[OPENMP] Refactoring of codegen for OpenMP directives.
Refactored API of OpenMPRuntime for compatibility with combined directives. Differential Revision: http://reviews.llvm.org/D8859 llvm-svn: 234564
This commit is contained in:
parent
bd51a6a99f
commit
6f1ffc069b
|
@ -31,37 +31,58 @@ namespace {
|
|||
/// \brief Base class for handling code generation inside OpenMP regions.
|
||||
class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
|
||||
public:
|
||||
CGOpenMPRegionInfo(const OMPExecutableDirective &D, const CapturedStmt &CS)
|
||||
: CGCapturedStmtInfo(CS, CR_OpenMP), Directive(D) {}
|
||||
/// \brief Kinds of OpenMP regions used in codegen.
|
||||
enum CGOpenMPRegionKind {
|
||||
/// \brief Region with outlined function for standalone 'parallel'
|
||||
/// directive.
|
||||
ParallelOutlinedRegion,
|
||||
/// \brief Region with outlined function for standalone 'task' directive.
|
||||
TaskOutlinedRegion,
|
||||
/// \brief Region for constructs that do not require function outlining,
|
||||
/// like 'for', 'sections', 'atomic' etc. directives.
|
||||
InlinedRegion,
|
||||
};
|
||||
|
||||
CGOpenMPRegionInfo(const OMPExecutableDirective &D)
|
||||
: CGCapturedStmtInfo(CR_OpenMP), Directive(D) {}
|
||||
CGOpenMPRegionInfo(const CapturedStmt &CS,
|
||||
const CGOpenMPRegionKind RegionKind,
|
||||
const RegionCodeGenTy &CodeGen)
|
||||
: CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
|
||||
CodeGen(CodeGen) {}
|
||||
|
||||
CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
|
||||
const RegionCodeGenTy &CodeGen)
|
||||
: CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind),
|
||||
CodeGen(CodeGen) {}
|
||||
|
||||
/// \brief Get a variable or parameter for storing global thread id
|
||||
/// inside OpenMP construct.
|
||||
virtual const VarDecl *getThreadIDVariable() const = 0;
|
||||
|
||||
/// \brief Emit the captured statement body.
|
||||
virtual void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
|
||||
|
||||
/// \brief Get an LValue for the current ThreadID variable.
|
||||
/// \return LValue for thread id variable. This LValue always has type int32*.
|
||||
virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
|
||||
|
||||
/// \brief Emit the captured statement body.
|
||||
virtual void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
|
||||
CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
|
||||
|
||||
static bool classof(const CGCapturedStmtInfo *Info) {
|
||||
return Info->getKind() == CR_OpenMP;
|
||||
}
|
||||
|
||||
protected:
|
||||
/// \brief OpenMP executable directive associated with the region.
|
||||
const OMPExecutableDirective &Directive;
|
||||
CGOpenMPRegionKind RegionKind;
|
||||
const RegionCodeGenTy &CodeGen;
|
||||
};
|
||||
|
||||
/// \brief API for captured statement code generation in OpenMP constructs.
|
||||
class CGOpenMPOutlinedRegionInfo : public CGOpenMPRegionInfo {
|
||||
public:
|
||||
CGOpenMPOutlinedRegionInfo(const OMPExecutableDirective &D,
|
||||
const CapturedStmt &CS, const VarDecl *ThreadIDVar)
|
||||
: CGOpenMPRegionInfo(D, CS), ThreadIDVar(ThreadIDVar) {
|
||||
CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
|
||||
const RegionCodeGenTy &CodeGen)
|
||||
: CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen),
|
||||
ThreadIDVar(ThreadIDVar) {
|
||||
assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
|
||||
}
|
||||
/// \brief Get a variable or parameter for storing global thread id
|
||||
|
@ -69,9 +90,16 @@ public:
|
|||
virtual const VarDecl *getThreadIDVariable() const override {
|
||||
return ThreadIDVar;
|
||||
}
|
||||
|
||||
/// \brief Get the name of the capture helper.
|
||||
StringRef getHelperName() const override { return ".omp_outlined."; }
|
||||
|
||||
static bool classof(const CGCapturedStmtInfo *Info) {
|
||||
return CGOpenMPRegionInfo::classof(Info) &&
|
||||
cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
|
||||
ParallelOutlinedRegion;
|
||||
}
|
||||
|
||||
private:
|
||||
/// \brief A variable or parameter storing global thread id for OpenMP
|
||||
/// constructs.
|
||||
|
@ -81,12 +109,11 @@ private:
|
|||
/// \brief API for captured statement code generation in OpenMP constructs.
|
||||
class CGOpenMPTaskOutlinedRegionInfo : public CGOpenMPRegionInfo {
|
||||
public:
|
||||
CGOpenMPTaskOutlinedRegionInfo(const OMPExecutableDirective &D,
|
||||
const CapturedStmt &CS,
|
||||
CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
|
||||
const VarDecl *ThreadIDVar,
|
||||
const VarDecl *PartIDVar)
|
||||
: CGOpenMPRegionInfo(D, CS), ThreadIDVar(ThreadIDVar),
|
||||
PartIDVar(PartIDVar) {
|
||||
const RegionCodeGenTy &CodeGen)
|
||||
: CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen),
|
||||
ThreadIDVar(ThreadIDVar) {
|
||||
assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
|
||||
}
|
||||
/// \brief Get a variable or parameter for storing global thread id
|
||||
|
@ -98,28 +125,28 @@ public:
|
|||
/// \brief Get an LValue for the current ThreadID variable.
|
||||
virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
|
||||
|
||||
/// \brief Emit the captured statement body.
|
||||
virtual void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
|
||||
|
||||
/// \brief Get the name of the capture helper.
|
||||
StringRef getHelperName() const override { return ".omp_outlined."; }
|
||||
|
||||
static bool classof(const CGCapturedStmtInfo *Info) {
|
||||
return CGOpenMPRegionInfo::classof(Info) &&
|
||||
cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
|
||||
TaskOutlinedRegion;
|
||||
}
|
||||
|
||||
private:
|
||||
/// \brief A variable or parameter storing global thread id for OpenMP
|
||||
/// constructs.
|
||||
const VarDecl *ThreadIDVar;
|
||||
/// \brief A variable or parameter storing part id for OpenMP tasking
|
||||
/// constructs.
|
||||
const VarDecl *PartIDVar;
|
||||
};
|
||||
|
||||
/// \brief API for inlined captured statement code generation in OpenMP
|
||||
/// constructs.
|
||||
class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
|
||||
public:
|
||||
CGOpenMPInlinedRegionInfo(const OMPExecutableDirective &D,
|
||||
CodeGenFunction::CGCapturedStmtInfo *OldCSI)
|
||||
: CGOpenMPRegionInfo(D), OldCSI(OldCSI),
|
||||
CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
|
||||
const RegionCodeGenTy &CodeGen)
|
||||
: CGOpenMPRegionInfo(InlinedRegion, CodeGen), OldCSI(OldCSI),
|
||||
OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
|
||||
// \brief Retrieve the value of the context parameter.
|
||||
virtual llvm::Value *getContextValue() const override {
|
||||
|
@ -127,6 +154,13 @@ public:
|
|||
return OuterRegionInfo->getContextValue();
|
||||
llvm_unreachable("No context value for inlined OpenMP region");
|
||||
}
|
||||
virtual void setContextValue(llvm::Value *V) override {
|
||||
if (OuterRegionInfo) {
|
||||
OuterRegionInfo->setContextValue(V);
|
||||
return;
|
||||
}
|
||||
llvm_unreachable("No context value for inlined OpenMP region");
|
||||
}
|
||||
/// \brief Lookup the captured field decl for a variable.
|
||||
virtual const FieldDecl *lookup(const VarDecl *VD) const override {
|
||||
if (OuterRegionInfo)
|
||||
|
@ -149,16 +183,48 @@ public:
|
|||
|
||||
/// \brief Get the name of the capture helper.
|
||||
virtual StringRef getHelperName() const override {
|
||||
if (auto *OuterRegionInfo = getOldCSI())
|
||||
return OuterRegionInfo->getHelperName();
|
||||
llvm_unreachable("No helper name for inlined OpenMP construct");
|
||||
}
|
||||
|
||||
CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
|
||||
|
||||
static bool classof(const CGCapturedStmtInfo *Info) {
|
||||
return CGOpenMPRegionInfo::classof(Info) &&
|
||||
cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
|
||||
}
|
||||
|
||||
private:
|
||||
/// \brief CodeGen info about outer OpenMP region.
|
||||
CodeGenFunction::CGCapturedStmtInfo *OldCSI;
|
||||
CGOpenMPRegionInfo *OuterRegionInfo;
|
||||
};
|
||||
|
||||
/// \brief RAII for emitting code of OpenMP constructs.
|
||||
class InlinedOpenMPRegionRAII {
|
||||
CodeGenFunction &CGF;
|
||||
|
||||
public:
|
||||
/// \brief Constructs region for combined constructs.
|
||||
/// \param CodeGen Code generation sequence for combined directives. Includes
|
||||
/// a list of functions used for code generation of implicitly inlined
|
||||
/// regions.
|
||||
InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen)
|
||||
: CGF(CGF) {
|
||||
// Start emission for the construct.
|
||||
CGF.CapturedStmtInfo =
|
||||
new CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, CodeGen);
|
||||
}
|
||||
~InlinedOpenMPRegionRAII() {
|
||||
// Restore original CapturedStmtInfo only if we're done with code emission.
|
||||
auto *OldCSI =
|
||||
cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
|
||||
delete CGF.CapturedStmtInfo;
|
||||
CGF.CapturedStmtInfo = OldCSI;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
|
||||
|
@ -172,15 +238,18 @@ LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
|
|||
->getPointeeType());
|
||||
}
|
||||
|
||||
void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
|
||||
CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
|
||||
CGF.EmitOMPPrivateClause(Directive, PrivateScope);
|
||||
CGF.EmitOMPFirstprivateClause(Directive, PrivateScope);
|
||||
if (PrivateScope.Privatize())
|
||||
// Emit implicit barrier to synchronize threads and avoid data races.
|
||||
CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, Directive.getLocStart(),
|
||||
OMPD_unknown);
|
||||
CGCapturedStmtInfo::EmitBody(CGF, S);
|
||||
void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
|
||||
// 1.2.2 OpenMP Language Terminology
|
||||
// Structured block - An executable statement with a single entry at the
|
||||
// top and a single exit at the bottom.
|
||||
// The point of exit cannot be a branch out of the structured block.
|
||||
// longjmp() and throw() must not violate the entry/exit criteria.
|
||||
CGF.EHStack.pushTerminate();
|
||||
{
|
||||
CodeGenFunction::RunCleanupsScope Scope(CGF);
|
||||
CodeGen(CGF);
|
||||
}
|
||||
CGF.EHStack.popTerminate();
|
||||
}
|
||||
|
||||
LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
|
||||
|
@ -190,14 +259,6 @@ LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
|
|||
getThreadIDVariable()->getType());
|
||||
}
|
||||
|
||||
void CGOpenMPTaskOutlinedRegionInfo::EmitBody(CodeGenFunction &CGF,
|
||||
const Stmt *S) {
|
||||
if (PartIDVar) {
|
||||
// TODO: emit code for untied tasks.
|
||||
}
|
||||
CGCapturedStmtInfo::EmitBody(CGF, S);
|
||||
}
|
||||
|
||||
CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
|
||||
: CGM(CGM), DefaultOpenMPPSource(nullptr), KmpRoutineEntryPtrTy(nullptr) {
|
||||
IdentTy = llvm::StructType::create(
|
||||
|
@ -216,13 +277,14 @@ void CGOpenMPRuntime::clear() {
|
|||
}
|
||||
|
||||
llvm::Value *
|
||||
CGOpenMPRuntime::emitOutlinedFunction(const OMPExecutableDirective &D,
|
||||
const VarDecl *ThreadIDVar) {
|
||||
CGOpenMPRuntime::emitParallelOutlinedFunction(const OMPExecutableDirective &D,
|
||||
const VarDecl *ThreadIDVar,
|
||||
const RegionCodeGenTy &CodeGen) {
|
||||
assert(ThreadIDVar->getType()->isPointerType() &&
|
||||
"thread id variable must be of type kmp_int32 *");
|
||||
const CapturedStmt *CS = cast<CapturedStmt>(D.getAssociatedStmt());
|
||||
CodeGenFunction CGF(CGM, true);
|
||||
CGOpenMPOutlinedRegionInfo CGInfo(D, *CS, ThreadIDVar);
|
||||
CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen);
|
||||
CGF.CapturedStmtInfo = &CGInfo;
|
||||
return CGF.GenerateCapturedStmtFunction(*CS);
|
||||
}
|
||||
|
@ -230,12 +292,12 @@ CGOpenMPRuntime::emitOutlinedFunction(const OMPExecutableDirective &D,
|
|||
llvm::Value *
|
||||
CGOpenMPRuntime::emitTaskOutlinedFunction(const OMPExecutableDirective &D,
|
||||
const VarDecl *ThreadIDVar,
|
||||
const VarDecl *PartIDVar) {
|
||||
const RegionCodeGenTy &CodeGen) {
|
||||
assert(!ThreadIDVar->getType()->isPointerType() &&
|
||||
"thread id variable must be of type kmp_int32 for tasks");
|
||||
auto *CS = cast<CapturedStmt>(D.getAssociatedStmt());
|
||||
CodeGenFunction CGF(CGM, true);
|
||||
CGOpenMPTaskOutlinedRegionInfo CGInfo(D, *CS, ThreadIDVar, PartIDVar);
|
||||
CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen);
|
||||
CGF.CapturedStmtInfo = &CGInfo;
|
||||
return CGF.GenerateCapturedStmtFunction(*CS);
|
||||
}
|
||||
|
@ -906,9 +968,21 @@ llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
|
|||
return getOrCreateInternalVariable(KmpCriticalNameTy, Name.concat(".var"));
|
||||
}
|
||||
|
||||
void CGOpenMPRuntime::emitCriticalRegion(
|
||||
CodeGenFunction &CGF, StringRef CriticalName,
|
||||
const std::function<void()> &CriticalOpGen, SourceLocation Loc) {
|
||||
namespace {
|
||||
class CallEndCleanup : public EHScopeStack::Cleanup {
|
||||
private:
|
||||
const RegionCodeGenTy CodeGen;
|
||||
|
||||
public:
|
||||
CallEndCleanup(const RegionCodeGenTy &CodeGen) : CodeGen(CodeGen) {}
|
||||
void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { CodeGen(CGF); }
|
||||
};
|
||||
} // namespace
|
||||
|
||||
void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
|
||||
StringRef CriticalName,
|
||||
const RegionCodeGenTy &CriticalOpGen,
|
||||
SourceLocation Loc) {
|
||||
auto RegionLock = getCriticalRegionLock(CriticalName);
|
||||
// __kmpc_critical(ident_t *, gtid, Lock);
|
||||
// CriticalOpGen();
|
||||
|
@ -916,14 +990,21 @@ void CGOpenMPRuntime::emitCriticalRegion(
|
|||
// Prepare arguments and build a call to __kmpc_critical
|
||||
llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
|
||||
RegionLock};
|
||||
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_critical), Args);
|
||||
CriticalOpGen();
|
||||
// Build a call to __kmpc_end_critical
|
||||
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_end_critical), Args);
|
||||
{
|
||||
CodeGenFunction::RunCleanupsScope Scope(CGF);
|
||||
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_critical), Args);
|
||||
emitInlinedDirective(CGF, CriticalOpGen);
|
||||
// Build a call to __kmpc_end_critical
|
||||
CGF.EHStack.pushCleanup<CallEndCleanup>(
|
||||
NormalAndEHCleanup, [this, Args](CodeGenFunction &CGF) {
|
||||
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_end_critical),
|
||||
Args);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
static void emitIfStmt(CodeGenFunction &CGF, llvm::Value *IfCond,
|
||||
const std::function<void()> &BodyOpGen) {
|
||||
const RegionCodeGenTy &BodyOpGen) {
|
||||
llvm::Value *CallBool = CGF.EmitScalarConversion(
|
||||
IfCond,
|
||||
CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true),
|
||||
|
@ -934,14 +1015,14 @@ static void emitIfStmt(CodeGenFunction &CGF, llvm::Value *IfCond,
|
|||
// Generate the branch (If-stmt)
|
||||
CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
|
||||
CGF.EmitBlock(ThenBlock);
|
||||
BodyOpGen();
|
||||
CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, BodyOpGen);
|
||||
// Emit the rest of bblocks/branches
|
||||
CGF.EmitBranch(ContBlock);
|
||||
CGF.EmitBlock(ContBlock, true);
|
||||
}
|
||||
|
||||
void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
|
||||
const std::function<void()> &MasterOpGen,
|
||||
const RegionCodeGenTy &MasterOpGen,
|
||||
SourceLocation Loc) {
|
||||
// if(__kmpc_master(ident_t *, gtid)) {
|
||||
// MasterOpGen();
|
||||
|
@ -951,12 +1032,14 @@ void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
|
|||
llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
|
||||
auto *IsMaster =
|
||||
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_master), Args);
|
||||
emitIfStmt(CGF, IsMaster, [&]() -> void {
|
||||
MasterOpGen();
|
||||
emitIfStmt(CGF, IsMaster, [&](CodeGenFunction &CGF) -> void {
|
||||
CodeGenFunction::RunCleanupsScope Scope(CGF);
|
||||
MasterOpGen(CGF);
|
||||
// Build a call to __kmpc_end_master.
|
||||
// OpenMP [1.2.2 OpenMP Language Terminology]
|
||||
// For C/C++, an executable statement, possibly compound, with a single
|
||||
// entry at the top and a single exit at the bottom, or an OpenMP construct.
|
||||
// entry at the top and a single exit at the bottom, or an OpenMP
|
||||
// construct.
|
||||
// * Access to the structured block must not be the result of a branch.
|
||||
// * The point of exit cannot be a branch out of the structured block.
|
||||
// * The point of entry must not be a call to setjmp().
|
||||
|
@ -967,7 +1050,12 @@ void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
|
|||
// structured block.
|
||||
// It is analyzed in Sema, so we can just call __kmpc_end_master() on
|
||||
// fallthrough rather than pushing a normal cleanup for it.
|
||||
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_end_master), Args);
|
||||
// Build a call to __kmpc_end_critical
|
||||
CGF.EHStack.pushCleanup<CallEndCleanup>(
|
||||
NormalAndEHCleanup, [this, Args](CodeGenFunction &CGF) {
|
||||
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_end_master),
|
||||
Args);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
|
@ -1046,7 +1134,7 @@ static llvm::Value *emitCopyprivateCopyFunction(
|
|||
}
|
||||
|
||||
void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
|
||||
const std::function<void()> &SingleOpGen,
|
||||
const RegionCodeGenTy &SingleOpGen,
|
||||
SourceLocation Loc,
|
||||
ArrayRef<const Expr *> CopyprivateVars,
|
||||
ArrayRef<const Expr *> SrcExprs,
|
||||
|
@ -1076,8 +1164,9 @@ void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
|
|||
llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
|
||||
auto *IsSingle =
|
||||
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_single), Args);
|
||||
emitIfStmt(CGF, IsSingle, [&]() -> void {
|
||||
SingleOpGen();
|
||||
emitIfStmt(CGF, IsSingle, [&](CodeGenFunction &CGF) -> void {
|
||||
CodeGenFunction::RunCleanupsScope Scope(CGF);
|
||||
SingleOpGen(CGF);
|
||||
if (DidIt) {
|
||||
// did_it = 1;
|
||||
CGF.Builder.CreateAlignedStore(CGF.Builder.getInt32(1), DidIt,
|
||||
|
@ -1097,7 +1186,11 @@ void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
|
|||
// structured block.
|
||||
// It is analyzed in Sema, so we can just call __kmpc_end_single() on
|
||||
// fallthrough rather than pushing a normal cleanup for it.
|
||||
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_end_single), Args);
|
||||
CGF.EHStack.pushCleanup<CallEndCleanup>(
|
||||
NormalAndEHCleanup, [this, Args](CodeGenFunction &CGF) {
|
||||
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_end_single),
|
||||
Args);
|
||||
});
|
||||
});
|
||||
// call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
|
||||
// <copy_func>, did_it);
|
||||
|
@ -1277,6 +1370,7 @@ void CGOpenMPRuntime::emitForFinish(CodeGenFunction &CGF, SourceLocation Loc,
|
|||
assert((ScheduleKind == OMPC_SCHEDULE_static ||
|
||||
ScheduleKind == OMPC_SCHEDULE_unknown) &&
|
||||
"Non-static schedule kinds are not yet implemented");
|
||||
(void)ScheduleKind;
|
||||
// Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
|
||||
llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
|
||||
getThreadID(CGF, Loc)};
|
||||
|
@ -1522,23 +1616,9 @@ void CGOpenMPRuntime::emitTaskCall(
|
|||
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs);
|
||||
}
|
||||
|
||||
InlinedOpenMPRegionRAII::InlinedOpenMPRegionRAII(
|
||||
CodeGenFunction &CGF, const OMPExecutableDirective &D)
|
||||
: CGF(CGF) {
|
||||
CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(D, CGF.CapturedStmtInfo);
|
||||
// 1.2.2 OpenMP Language Terminology
|
||||
// Structured block - An executable statement with a single entry at the
|
||||
// top and a single exit at the bottom.
|
||||
// The point of exit cannot be a branch out of the structured block.
|
||||
// longjmp() and throw() must not violate the entry/exit criteria.
|
||||
CGF.EHStack.pushTerminate();
|
||||
}
|
||||
|
||||
InlinedOpenMPRegionRAII::~InlinedOpenMPRegionRAII() {
|
||||
CGF.EHStack.popTerminate();
|
||||
auto *OldCSI =
|
||||
cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
|
||||
delete CGF.CapturedStmtInfo;
|
||||
CGF.CapturedStmtInfo = OldCSI;
|
||||
void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
|
||||
const RegionCodeGenTy &CodeGen) {
|
||||
InlinedOpenMPRegionRAII Region(CGF, CodeGen);
|
||||
CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
|
||||
}
|
||||
|
||||
|
|
|
@ -43,7 +43,10 @@ namespace CodeGen {
|
|||
class CodeGenFunction;
|
||||
class CodeGenModule;
|
||||
|
||||
typedef llvm::function_ref<void(CodeGenFunction &)> RegionCodeGenTy;
|
||||
|
||||
class CGOpenMPRuntime {
|
||||
private:
|
||||
enum OpenMPRTLFunction {
|
||||
/// \brief Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc,
|
||||
/// kmpc_micro microtask, ...);
|
||||
|
@ -284,25 +287,27 @@ public:
|
|||
virtual ~CGOpenMPRuntime() {}
|
||||
virtual void clear();
|
||||
|
||||
/// \brief Emits outlined function for the specified OpenMP directive \a D.
|
||||
/// This outlined function has type void(*)(kmp_int32 *ThreadID, kmp_int32
|
||||
/// BoundID, struct context_vars*).
|
||||
/// \brief Emits outlined function for the specified OpenMP parallel directive
|
||||
/// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID,
|
||||
/// kmp_int32 BoundID, struct context_vars*).
|
||||
/// \param D OpenMP directive.
|
||||
/// \param ThreadIDVar Variable for thread id in the current OpenMP region.
|
||||
///
|
||||
virtual llvm::Value *emitOutlinedFunction(const OMPExecutableDirective &D,
|
||||
const VarDecl *ThreadIDVar);
|
||||
/// \param CodeGen Code generation sequence for the \a D directive.
|
||||
virtual llvm::Value *
|
||||
emitParallelOutlinedFunction(const OMPExecutableDirective &D,
|
||||
const VarDecl *ThreadIDVar,
|
||||
const RegionCodeGenTy &CodeGen);
|
||||
|
||||
/// \brief Emits outlined function for the OpenMP task directive \a D. This
|
||||
/// outlined function has type void(*)(kmp_int32 ThreadID, kmp_int32
|
||||
/// PartID, struct context_vars*).
|
||||
/// \param D OpenMP directive.
|
||||
/// \param ThreadIDVar Variable for thread id in the current OpenMP region.
|
||||
/// \param PartIDVar If not nullptr - variable used for part id in tasks.
|
||||
/// \param CodeGen Code generation sequence for the \a D directive.
|
||||
///
|
||||
virtual llvm::Value *emitTaskOutlinedFunction(const OMPExecutableDirective &D,
|
||||
const VarDecl *ThreadIDVar,
|
||||
const VarDecl *PartIDVar);
|
||||
const RegionCodeGenTy &CodeGen);
|
||||
|
||||
/// \brief Cleans up references to the objects in finished function.
|
||||
///
|
||||
|
@ -334,14 +339,14 @@ public:
|
|||
/// \param CriticalOpGen Generator for the statement associated with the given
|
||||
/// critical region.
|
||||
virtual void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName,
|
||||
const std::function<void()> &CriticalOpGen,
|
||||
const RegionCodeGenTy &CriticalOpGen,
|
||||
SourceLocation Loc);
|
||||
|
||||
/// \brief Emits a master region.
|
||||
/// \param MasterOpGen Generator for the statement associated with the given
|
||||
/// master region.
|
||||
virtual void emitMasterRegion(CodeGenFunction &CGF,
|
||||
const std::function<void()> &MasterOpGen,
|
||||
const RegionCodeGenTy &MasterOpGen,
|
||||
SourceLocation Loc);
|
||||
|
||||
/// \brief Emits code for a taskyield directive.
|
||||
|
@ -351,7 +356,7 @@ public:
|
|||
/// \param SingleOpGen Generator for the statement associated with the given
|
||||
/// single region.
|
||||
virtual void emitSingleRegion(CodeGenFunction &CGF,
|
||||
const std::function<void()> &SingleOpGen,
|
||||
const RegionCodeGenTy &SingleOpGen,
|
||||
SourceLocation Loc,
|
||||
ArrayRef<const Expr *> CopyprivateVars,
|
||||
ArrayRef<const Expr *> SrcExprs,
|
||||
|
@ -506,17 +511,13 @@ public:
|
|||
llvm::PointerIntPair<llvm::Value *, 1, bool> Final,
|
||||
llvm::Value *TaskFunction, QualType SharedsTy,
|
||||
llvm::Value *Shareds);
|
||||
/// \brief Emit code for the directive that does not require outlining.
|
||||
///
|
||||
/// \param CodeGen Code generation sequence for the \a D directive.
|
||||
virtual void emitInlinedDirective(CodeGenFunction &CGF,
|
||||
const RegionCodeGenTy &CodeGen);
|
||||
};
|
||||
|
||||
/// \brief RAII for emitting code of CapturedStmt without function outlining.
|
||||
class InlinedOpenMPRegionRAII {
|
||||
CodeGenFunction &CGF;
|
||||
|
||||
public:
|
||||
InlinedOpenMPRegionRAII(CodeGenFunction &CGF,
|
||||
const OMPExecutableDirective &D);
|
||||
~InlinedOpenMPRegionRAII();
|
||||
};
|
||||
} // namespace CodeGen
|
||||
} // namespace clang
|
||||
|
||||
|
|
|
@ -23,21 +23,6 @@ using namespace CodeGen;
|
|||
//===----------------------------------------------------------------------===//
|
||||
// OpenMP Directive Emission
|
||||
//===----------------------------------------------------------------------===//
|
||||
namespace {
|
||||
/// \brief RAII for inlined OpenMP regions (like 'omp for', 'omp simd', 'omp
|
||||
/// critical' etc.). Helps to generate proper debug info and provides correct
|
||||
/// code generation for such constructs.
|
||||
class InlinedOpenMPRegionScopeRAII {
|
||||
InlinedOpenMPRegionRAII Region;
|
||||
CodeGenFunction::LexicalScope DirectiveScope;
|
||||
|
||||
public:
|
||||
InlinedOpenMPRegionScopeRAII(CodeGenFunction &CGF,
|
||||
const OMPExecutableDirective &D)
|
||||
: Region(CGF, D), DirectiveScope(CGF, D.getSourceRange()) {}
|
||||
};
|
||||
} // namespace
|
||||
|
||||
/// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen
|
||||
/// function. Here is the logic:
|
||||
/// if (Cond) {
|
||||
|
@ -225,8 +210,8 @@ void CodeGenFunction::EmitOMPPrivateClause(
|
|||
}
|
||||
|
||||
/// \brief Emits code for OpenMP parallel directive in the parallel region.
|
||||
static void EmitOMPParallelCall(CodeGenFunction &CGF,
|
||||
const OMPParallelDirective &S,
|
||||
static void emitOMPParallelCall(CodeGenFunction &CGF,
|
||||
const OMPExecutableDirective &S,
|
||||
llvm::Value *OutlinedFn,
|
||||
llvm::Value *CapturedStruct) {
|
||||
if (auto C = S.getSingleClause(/*K*/ OMPC_num_threads)) {
|
||||
|
@ -241,22 +226,43 @@ static void EmitOMPParallelCall(CodeGenFunction &CGF,
|
|||
CapturedStruct);
|
||||
}
|
||||
|
||||
void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
|
||||
static void emitCommonOMPParallelDirective(CodeGenFunction &CGF,
|
||||
const OMPExecutableDirective &S,
|
||||
const RegionCodeGenTy &CodeGen) {
|
||||
auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
|
||||
auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
|
||||
auto OutlinedFn = CGM.getOpenMPRuntime().emitOutlinedFunction(
|
||||
S, *CS->getCapturedDecl()->param_begin());
|
||||
auto CapturedStruct = CGF.GenerateCapturedStmtArgument(*CS);
|
||||
auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction(
|
||||
S, *CS->getCapturedDecl()->param_begin(), CodeGen);
|
||||
if (auto C = S.getSingleClause(/*K*/ OMPC_if)) {
|
||||
auto Cond = cast<OMPIfClause>(C)->getCondition();
|
||||
EmitOMPIfClause(*this, Cond, [&](bool ThenBlock) {
|
||||
EmitOMPIfClause(CGF, Cond, [&](bool ThenBlock) {
|
||||
if (ThenBlock)
|
||||
EmitOMPParallelCall(*this, S, OutlinedFn, CapturedStruct);
|
||||
emitOMPParallelCall(CGF, S, OutlinedFn, CapturedStruct);
|
||||
else
|
||||
CGM.getOpenMPRuntime().emitSerialCall(*this, S.getLocStart(),
|
||||
OutlinedFn, CapturedStruct);
|
||||
CGF.CGM.getOpenMPRuntime().emitSerialCall(CGF, S.getLocStart(),
|
||||
OutlinedFn, CapturedStruct);
|
||||
});
|
||||
} else
|
||||
EmitOMPParallelCall(*this, S, OutlinedFn, CapturedStruct);
|
||||
emitOMPParallelCall(CGF, S, OutlinedFn, CapturedStruct);
|
||||
}
|
||||
|
||||
void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
|
||||
LexicalScope Scope(*this, S.getSourceRange());
|
||||
// Emit parallel region as a standalone region.
|
||||
auto &&CodeGen = [&S](CodeGenFunction &CGF) {
|
||||
OMPPrivateScope PrivateScope(CGF);
|
||||
CGF.EmitOMPPrivateClause(S, PrivateScope);
|
||||
CGF.EmitOMPFirstprivateClause(S, PrivateScope);
|
||||
if (PrivateScope.Privatize())
|
||||
// Emit implicit barrier to synchronize threads and avoid data races.
|
||||
CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getLocStart(),
|
||||
OMPD_unknown);
|
||||
CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
|
||||
// Emit implicit barrier at the end of the 'parallel' directive.
|
||||
CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getLocStart(),
|
||||
OMPD_unknown);
|
||||
};
|
||||
emitCommonOMPParallelDirective(*this, S, CodeGen);
|
||||
}
|
||||
|
||||
void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &S,
|
||||
|
@ -289,10 +295,10 @@ void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &S,
|
|||
}
|
||||
}
|
||||
|
||||
void CodeGenFunction::EmitOMPInnerLoop(const Stmt &S, bool RequiresCleanup,
|
||||
const Expr *LoopCond,
|
||||
const Expr *IncExpr,
|
||||
const std::function<void()> &BodyGen) {
|
||||
void CodeGenFunction::EmitOMPInnerLoop(
|
||||
const Stmt &S, bool RequiresCleanup, const Expr *LoopCond,
|
||||
const Expr *IncExpr,
|
||||
const llvm::function_ref<void(CodeGenFunction &)> &BodyGen) {
|
||||
auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end");
|
||||
auto Cnt = getPGORegionCounter(&S);
|
||||
|
||||
|
@ -323,7 +329,7 @@ void CodeGenFunction::EmitOMPInnerLoop(const Stmt &S, bool RequiresCleanup,
|
|||
auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc");
|
||||
BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
|
||||
|
||||
BodyGen();
|
||||
BodyGen(*this);
|
||||
|
||||
// Emit "IV = IV + 1" and a back-edge to the condition block.
|
||||
EmitBlock(Continue.getBlock());
|
||||
|
@ -414,129 +420,132 @@ EmitPrivateLinearVars(CodeGenFunction &CGF, const OMPExecutableDirective &D,
|
|||
}
|
||||
|
||||
void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
|
||||
// Pragma 'simd' code depends on presence of 'lastprivate'.
|
||||
// If present, we have to separate last iteration of the loop:
|
||||
//
|
||||
// if (LastIteration != 0) {
|
||||
// for (IV in 0..LastIteration-1) BODY;
|
||||
// BODY with updates of lastprivate vars;
|
||||
// <Final counter/linear vars updates>;
|
||||
// }
|
||||
//
|
||||
// otherwise (when there's no lastprivate):
|
||||
//
|
||||
// for (IV in 0..LastIteration) BODY;
|
||||
// <Final counter/linear vars updates>;
|
||||
//
|
||||
auto &&CodeGen = [&S](CodeGenFunction &CGF) {
|
||||
// Pragma 'simd' code depends on presence of 'lastprivate'.
|
||||
// If present, we have to separate last iteration of the loop:
|
||||
//
|
||||
// if (LastIteration != 0) {
|
||||
// for (IV in 0..LastIteration-1) BODY;
|
||||
// BODY with updates of lastprivate vars;
|
||||
// <Final counter/linear vars updates>;
|
||||
// }
|
||||
//
|
||||
// otherwise (when there's no lastprivate):
|
||||
//
|
||||
// for (IV in 0..LastIteration) BODY;
|
||||
// <Final counter/linear vars updates>;
|
||||
//
|
||||
|
||||
// Walk clauses and process safelen/lastprivate.
|
||||
bool SeparateIter = false;
|
||||
LoopStack.setParallel();
|
||||
LoopStack.setVectorizerEnable(true);
|
||||
for (auto C : S.clauses()) {
|
||||
switch (C->getClauseKind()) {
|
||||
case OMPC_safelen: {
|
||||
RValue Len = EmitAnyExpr(cast<OMPSafelenClause>(C)->getSafelen(),
|
||||
AggValueSlot::ignored(), true);
|
||||
llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
|
||||
LoopStack.setVectorizerWidth(Val->getZExtValue());
|
||||
// In presence of finite 'safelen', it may be unsafe to mark all
|
||||
// the memory instructions parallel, because loop-carried
|
||||
// dependences of 'safelen' iterations are possible.
|
||||
LoopStack.setParallel(false);
|
||||
break;
|
||||
}
|
||||
case OMPC_aligned:
|
||||
EmitOMPAlignedClause(*this, CGM, cast<OMPAlignedClause>(*C));
|
||||
break;
|
||||
case OMPC_lastprivate:
|
||||
SeparateIter = true;
|
||||
break;
|
||||
default:
|
||||
// Not handled yet
|
||||
;
|
||||
}
|
||||
}
|
||||
|
||||
InlinedOpenMPRegionScopeRAII Region(*this, S);
|
||||
|
||||
// Emit inits for the linear variables.
|
||||
for (auto C : OMPExecutableDirective::linear_filter(S.clauses())) {
|
||||
for (auto Init : C->inits()) {
|
||||
auto *D = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl());
|
||||
EmitVarDecl(*D);
|
||||
}
|
||||
}
|
||||
|
||||
// Emit the loop iteration variable.
|
||||
const Expr *IVExpr = S.getIterationVariable();
|
||||
const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
|
||||
EmitVarDecl(*IVDecl);
|
||||
EmitIgnoredExpr(S.getInit());
|
||||
|
||||
// Emit the iterations count variable.
|
||||
// If it is not a variable, Sema decided to calculate iterations count on each
|
||||
// iteration (e.g., it is foldable into a constant).
|
||||
if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
|
||||
EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
|
||||
// Emit calculation of the iterations count.
|
||||
EmitIgnoredExpr(S.getCalcLastIteration());
|
||||
}
|
||||
|
||||
// Emit the linear steps for the linear clauses.
|
||||
// If a step is not constant, it is pre-calculated before the loop.
|
||||
for (auto C : OMPExecutableDirective::linear_filter(S.clauses())) {
|
||||
if (auto CS = cast_or_null<BinaryOperator>(C->getCalcStep()))
|
||||
if (auto SaveRef = cast<DeclRefExpr>(CS->getLHS())) {
|
||||
EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl()));
|
||||
// Emit calculation of the linear step.
|
||||
EmitIgnoredExpr(CS);
|
||||
// Walk clauses and process safelen/lastprivate.
|
||||
bool SeparateIter = false;
|
||||
CGF.LoopStack.setParallel();
|
||||
CGF.LoopStack.setVectorizerEnable(true);
|
||||
for (auto C : S.clauses()) {
|
||||
switch (C->getClauseKind()) {
|
||||
case OMPC_safelen: {
|
||||
RValue Len = CGF.EmitAnyExpr(cast<OMPSafelenClause>(C)->getSafelen(),
|
||||
AggValueSlot::ignored(), true);
|
||||
llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
|
||||
CGF.LoopStack.setVectorizerWidth(Val->getZExtValue());
|
||||
// In presence of finite 'safelen', it may be unsafe to mark all
|
||||
// the memory instructions parallel, because loop-carried
|
||||
// dependences of 'safelen' iterations are possible.
|
||||
CGF.LoopStack.setParallel(false);
|
||||
break;
|
||||
}
|
||||
}
|
||||
case OMPC_aligned:
|
||||
EmitOMPAlignedClause(CGF, CGF.CGM, cast<OMPAlignedClause>(*C));
|
||||
break;
|
||||
case OMPC_lastprivate:
|
||||
SeparateIter = true;
|
||||
break;
|
||||
default:
|
||||
// Not handled yet
|
||||
;
|
||||
}
|
||||
}
|
||||
|
||||
if (SeparateIter) {
|
||||
// Emit: if (LastIteration > 0) - begin.
|
||||
RegionCounter Cnt = getPGORegionCounter(&S);
|
||||
auto ThenBlock = createBasicBlock("simd.if.then");
|
||||
auto ContBlock = createBasicBlock("simd.if.end");
|
||||
EmitBranchOnBoolExpr(S.getPreCond(), ThenBlock, ContBlock, Cnt.getCount());
|
||||
EmitBlock(ThenBlock);
|
||||
Cnt.beginRegion(Builder);
|
||||
// Emit 'then' code.
|
||||
{
|
||||
OMPPrivateScope LoopScope(*this);
|
||||
EmitPrivateLoopCounters(*this, LoopScope, S.counters());
|
||||
EmitPrivateLinearVars(*this, S, LoopScope);
|
||||
EmitOMPPrivateClause(S, LoopScope);
|
||||
(void)LoopScope.Privatize();
|
||||
EmitOMPInnerLoop(S, LoopScope.requiresCleanups(),
|
||||
S.getCond(/*SeparateIter=*/true), S.getInc(),
|
||||
[&S, this]() {
|
||||
EmitOMPLoopBody(S);
|
||||
EmitStopPoint(&S);
|
||||
});
|
||||
EmitOMPLoopBody(S, /* SeparateIter */ true);
|
||||
// Emit inits for the linear variables.
|
||||
for (auto C : OMPExecutableDirective::linear_filter(S.clauses())) {
|
||||
for (auto Init : C->inits()) {
|
||||
auto *D = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl());
|
||||
CGF.EmitVarDecl(*D);
|
||||
}
|
||||
}
|
||||
EmitOMPSimdFinal(S);
|
||||
// Emit: if (LastIteration != 0) - end.
|
||||
EmitBranch(ContBlock);
|
||||
EmitBlock(ContBlock, true);
|
||||
} else {
|
||||
{
|
||||
OMPPrivateScope LoopScope(*this);
|
||||
EmitPrivateLoopCounters(*this, LoopScope, S.counters());
|
||||
EmitPrivateLinearVars(*this, S, LoopScope);
|
||||
EmitOMPPrivateClause(S, LoopScope);
|
||||
(void)LoopScope.Privatize();
|
||||
EmitOMPInnerLoop(S, LoopScope.requiresCleanups(),
|
||||
S.getCond(/*SeparateIter=*/false), S.getInc(),
|
||||
[&S, this]() {
|
||||
EmitOMPLoopBody(S);
|
||||
EmitStopPoint(&S);
|
||||
});
|
||||
|
||||
// Emit the loop iteration variable.
|
||||
const Expr *IVExpr = S.getIterationVariable();
|
||||
const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
|
||||
CGF.EmitVarDecl(*IVDecl);
|
||||
CGF.EmitIgnoredExpr(S.getInit());
|
||||
|
||||
// Emit the iterations count variable.
|
||||
// If it is not a variable, Sema decided to calculate iterations count on
|
||||
// each
|
||||
// iteration (e.g., it is foldable into a constant).
|
||||
if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
|
||||
CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
|
||||
// Emit calculation of the iterations count.
|
||||
CGF.EmitIgnoredExpr(S.getCalcLastIteration());
|
||||
}
|
||||
EmitOMPSimdFinal(S);
|
||||
}
|
||||
|
||||
// Emit the linear steps for the linear clauses.
|
||||
// If a step is not constant, it is pre-calculated before the loop.
|
||||
for (auto C : OMPExecutableDirective::linear_filter(S.clauses())) {
|
||||
if (auto CS = cast_or_null<BinaryOperator>(C->getCalcStep()))
|
||||
if (auto SaveRef = cast<DeclRefExpr>(CS->getLHS())) {
|
||||
CGF.EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl()));
|
||||
// Emit calculation of the linear step.
|
||||
CGF.EmitIgnoredExpr(CS);
|
||||
}
|
||||
}
|
||||
|
||||
if (SeparateIter) {
|
||||
// Emit: if (LastIteration > 0) - begin.
|
||||
RegionCounter Cnt = CGF.getPGORegionCounter(&S);
|
||||
auto ThenBlock = CGF.createBasicBlock("simd.if.then");
|
||||
auto ContBlock = CGF.createBasicBlock("simd.if.end");
|
||||
CGF.EmitBranchOnBoolExpr(S.getPreCond(), ThenBlock, ContBlock,
|
||||
Cnt.getCount());
|
||||
CGF.EmitBlock(ThenBlock);
|
||||
Cnt.beginRegion(CGF.Builder);
|
||||
// Emit 'then' code.
|
||||
{
|
||||
OMPPrivateScope LoopScope(CGF);
|
||||
EmitPrivateLoopCounters(CGF, LoopScope, S.counters());
|
||||
EmitPrivateLinearVars(CGF, S, LoopScope);
|
||||
CGF.EmitOMPPrivateClause(S, LoopScope);
|
||||
(void)LoopScope.Privatize();
|
||||
CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(),
|
||||
S.getCond(/*SeparateIter=*/true), S.getInc(),
|
||||
[&S](CodeGenFunction &CGF) {
|
||||
CGF.EmitOMPLoopBody(S);
|
||||
CGF.EmitStopPoint(&S);
|
||||
});
|
||||
CGF.EmitOMPLoopBody(S, /* SeparateIter */ true);
|
||||
}
|
||||
CGF.EmitOMPSimdFinal(S);
|
||||
// Emit: if (LastIteration != 0) - end.
|
||||
CGF.EmitBranch(ContBlock);
|
||||
CGF.EmitBlock(ContBlock, true);
|
||||
} else {
|
||||
{
|
||||
OMPPrivateScope LoopScope(CGF);
|
||||
EmitPrivateLoopCounters(CGF, LoopScope, S.counters());
|
||||
EmitPrivateLinearVars(CGF, S, LoopScope);
|
||||
CGF.EmitOMPPrivateClause(S, LoopScope);
|
||||
(void)LoopScope.Privatize();
|
||||
CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(),
|
||||
S.getCond(/*SeparateIter=*/false), S.getInc(),
|
||||
[&S](CodeGenFunction &CGF) {
|
||||
CGF.EmitOMPLoopBody(S);
|
||||
CGF.EmitStopPoint(&S);
|
||||
});
|
||||
}
|
||||
CGF.EmitOMPSimdFinal(S);
|
||||
}
|
||||
};
|
||||
CGM.getOpenMPRuntime().emitInlinedDirective(*this, CodeGen);
|
||||
}
|
||||
|
||||
void CodeGenFunction::EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind,
|
||||
|
@ -654,9 +663,10 @@ void CodeGenFunction::EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind,
|
|||
BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
|
||||
|
||||
EmitOMPInnerLoop(S, LoopScope.requiresCleanups(),
|
||||
S.getCond(/*SeparateIter=*/false), S.getInc(), [&S, this]() {
|
||||
EmitOMPLoopBody(S);
|
||||
EmitStopPoint(&S);
|
||||
S.getCond(/*SeparateIter=*/false), S.getInc(),
|
||||
[&S](CodeGenFunction &CGF) {
|
||||
CGF.EmitOMPLoopBody(S);
|
||||
CGF.EmitStopPoint(&S);
|
||||
});
|
||||
|
||||
EmitBlock(Continue.getBlock());
|
||||
|
@ -759,9 +769,9 @@ void CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) {
|
|||
// while (idx <= UB) { BODY; ++idx; }
|
||||
EmitOMPInnerLoop(S, LoopScope.requiresCleanups(),
|
||||
S.getCond(/*SeparateIter=*/false), S.getInc(),
|
||||
[&S, this]() {
|
||||
EmitOMPLoopBody(S);
|
||||
EmitStopPoint(&S);
|
||||
[&S](CodeGenFunction &CGF) {
|
||||
CGF.EmitOMPLoopBody(S);
|
||||
CGF.EmitStopPoint(&S);
|
||||
});
|
||||
// Tell the runtime we are done.
|
||||
RT.emitForFinish(*this, S.getLocStart(), ScheduleKind);
|
||||
|
@ -780,9 +790,10 @@ void CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) {
|
|||
}
|
||||
|
||||
void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
|
||||
InlinedOpenMPRegionScopeRAII Region(*this, S);
|
||||
|
||||
EmitOMPWorksharingLoop(S);
|
||||
LexicalScope Scope(*this, S.getSourceRange());
|
||||
auto &&CodeGen =
|
||||
[&S](CodeGenFunction &CGF) { CGF.EmitOMPWorksharingLoop(S); };
|
||||
CGM.getOpenMPRuntime().emitInlinedDirective(*this, CodeGen);
|
||||
|
||||
// Emit an implicit barrier at the end.
|
||||
if (!S.getSingleClause(OMPC_nowait)) {
|
||||
|
@ -804,86 +815,94 @@ static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty,
|
|||
}
|
||||
|
||||
void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
|
||||
InlinedOpenMPRegionScopeRAII Region(*this, S);
|
||||
|
||||
LexicalScope Scope(*this, S.getSourceRange());
|
||||
auto *Stmt = cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt();
|
||||
auto *CS = dyn_cast<CompoundStmt>(Stmt);
|
||||
if (CS && CS->size() > 1) {
|
||||
auto &C = CGM.getContext();
|
||||
auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
|
||||
// Emit helper vars inits.
|
||||
LValue LB = createSectionLVal(*this, KmpInt32Ty, ".omp.sections.lb.",
|
||||
Builder.getInt32(0));
|
||||
auto *GlobalUBVal = Builder.getInt32(CS->size() - 1);
|
||||
LValue UB =
|
||||
createSectionLVal(*this, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal);
|
||||
LValue ST = createSectionLVal(*this, KmpInt32Ty, ".omp.sections.st.",
|
||||
Builder.getInt32(1));
|
||||
LValue IL = createSectionLVal(*this, KmpInt32Ty, ".omp.sections.il.",
|
||||
Builder.getInt32(0));
|
||||
// Loop counter.
|
||||
LValue IV = createSectionLVal(*this, KmpInt32Ty, ".omp.sections.iv.");
|
||||
OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
|
||||
OpaqueValueMapping OpaqueIV(*this, &IVRefExpr, IV);
|
||||
OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
|
||||
OpaqueValueMapping OpaqueUB(*this, &UBRefExpr, UB);
|
||||
// Generate condition for loop.
|
||||
BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue,
|
||||
OK_Ordinary, S.getLocStart(), /*fpContractable=*/false);
|
||||
// Increment for loop counter.
|
||||
UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary,
|
||||
S.getLocStart());
|
||||
auto BodyGen = [this, CS, &S, &IV]() {
|
||||
// Iterate through all sections and emit a switch construct:
|
||||
// switch (IV) {
|
||||
// case 0:
|
||||
// <SectionStmt[0]>;
|
||||
// break;
|
||||
// ...
|
||||
// case <NumSection> - 1:
|
||||
// <SectionStmt[<NumSection> - 1]>;
|
||||
// break;
|
||||
// }
|
||||
// .omp.sections.exit:
|
||||
auto *ExitBB = createBasicBlock(".omp.sections.exit");
|
||||
auto *SwitchStmt = Builder.CreateSwitch(
|
||||
EmitLoadOfLValue(IV, S.getLocStart()).getScalarVal(), ExitBB,
|
||||
CS->size());
|
||||
unsigned CaseNumber = 0;
|
||||
for (auto C = CS->children(); C; ++C, ++CaseNumber) {
|
||||
auto CaseBB = createBasicBlock(".omp.sections.case");
|
||||
EmitBlock(CaseBB);
|
||||
SwitchStmt->addCase(Builder.getInt32(CaseNumber), CaseBB);
|
||||
EmitStmt(*C);
|
||||
EmitBranch(ExitBB);
|
||||
}
|
||||
EmitBlock(ExitBB, /*IsFinished=*/true);
|
||||
auto &&CodeGen = [&S, CS](CodeGenFunction &CGF) {
|
||||
auto &C = CGF.CGM.getContext();
|
||||
auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
|
||||
// Emit helper vars inits.
|
||||
LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.",
|
||||
CGF.Builder.getInt32(0));
|
||||
auto *GlobalUBVal = CGF.Builder.getInt32(CS->size() - 1);
|
||||
LValue UB =
|
||||
createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal);
|
||||
LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.",
|
||||
CGF.Builder.getInt32(1));
|
||||
LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.",
|
||||
CGF.Builder.getInt32(0));
|
||||
// Loop counter.
|
||||
LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv.");
|
||||
OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
|
||||
OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV);
|
||||
OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
|
||||
OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB);
|
||||
// Generate condition for loop.
|
||||
BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue,
|
||||
OK_Ordinary, S.getLocStart(),
|
||||
/*fpContractable=*/false);
|
||||
// Increment for loop counter.
|
||||
UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue,
|
||||
OK_Ordinary, S.getLocStart());
|
||||
auto BodyGen = [CS, &S, &IV](CodeGenFunction &CGF) {
|
||||
// Iterate through all sections and emit a switch construct:
|
||||
// switch (IV) {
|
||||
// case 0:
|
||||
// <SectionStmt[0]>;
|
||||
// break;
|
||||
// ...
|
||||
// case <NumSection> - 1:
|
||||
// <SectionStmt[<NumSection> - 1]>;
|
||||
// break;
|
||||
// }
|
||||
// .omp.sections.exit:
|
||||
auto *ExitBB = CGF.createBasicBlock(".omp.sections.exit");
|
||||
auto *SwitchStmt = CGF.Builder.CreateSwitch(
|
||||
CGF.EmitLoadOfLValue(IV, S.getLocStart()).getScalarVal(), ExitBB,
|
||||
CS->size());
|
||||
unsigned CaseNumber = 0;
|
||||
for (auto C = CS->children(); C; ++C, ++CaseNumber) {
|
||||
auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
|
||||
CGF.EmitBlock(CaseBB);
|
||||
SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB);
|
||||
CGF.EmitStmt(*C);
|
||||
CGF.EmitBranch(ExitBB);
|
||||
}
|
||||
CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
|
||||
};
|
||||
// Emit static non-chunked loop.
|
||||
CGF.CGM.getOpenMPRuntime().emitForInit(
|
||||
CGF, S.getLocStart(), OMPC_SCHEDULE_static, /*IVSize=*/32,
|
||||
/*IVSigned=*/true, IL.getAddress(), LB.getAddress(), UB.getAddress(),
|
||||
ST.getAddress());
|
||||
// UB = min(UB, GlobalUB);
|
||||
auto *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart());
|
||||
auto *MinUBGlobalUB = CGF.Builder.CreateSelect(
|
||||
CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal);
|
||||
CGF.EmitStoreOfScalar(MinUBGlobalUB, UB);
|
||||
// IV = LB;
|
||||
CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getLocStart()), IV);
|
||||
// while (idx <= UB) { BODY; ++idx; }
|
||||
CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen);
|
||||
// Tell the runtime we are done.
|
||||
CGF.CGM.getOpenMPRuntime().emitForFinish(CGF, S.getLocStart(),
|
||||
OMPC_SCHEDULE_static);
|
||||
};
|
||||
// Emit static non-chunked loop.
|
||||
CGM.getOpenMPRuntime().emitForInit(
|
||||
*this, S.getLocStart(), OMPC_SCHEDULE_static, /*IVSize=*/32,
|
||||
/*IVSigned=*/true, IL.getAddress(), LB.getAddress(), UB.getAddress(),
|
||||
ST.getAddress());
|
||||
// UB = min(UB, GlobalUB);
|
||||
auto *UBVal = EmitLoadOfScalar(UB, S.getLocStart());
|
||||
auto *MinUBGlobalUB = Builder.CreateSelect(
|
||||
Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal);
|
||||
EmitStoreOfScalar(MinUBGlobalUB, UB);
|
||||
// IV = LB;
|
||||
EmitStoreOfScalar(EmitLoadOfScalar(LB, S.getLocStart()), IV);
|
||||
// while (idx <= UB) { BODY; ++idx; }
|
||||
EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen);
|
||||
// Tell the runtime we are done.
|
||||
CGM.getOpenMPRuntime().emitForFinish(*this, S.getLocStart(),
|
||||
OMPC_SCHEDULE_static);
|
||||
|
||||
CGM.getOpenMPRuntime().emitInlinedDirective(*this, CodeGen);
|
||||
} else {
|
||||
// If only one section is found - no need to generate loop, emit as a single
|
||||
// If only one section is found - no need to generate loop, emit as a
|
||||
// single
|
||||
// region.
|
||||
CGM.getOpenMPRuntime().emitSingleRegion(*this, [&]() -> void {
|
||||
InlinedOpenMPRegionScopeRAII Region(*this, S);
|
||||
EmitStmt(Stmt);
|
||||
EnsureInsertPoint();
|
||||
}, S.getLocStart(), llvm::None, llvm::None, llvm::None, llvm::None);
|
||||
auto &&CodeGen = [&S](CodeGenFunction &CGF) {
|
||||
CGF.EmitStmt(
|
||||
cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
|
||||
CGF.EnsureInsertPoint();
|
||||
};
|
||||
CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(),
|
||||
llvm::None, llvm::None, llvm::None,
|
||||
llvm::None);
|
||||
}
|
||||
|
||||
// Emit an implicit barrier at the end.
|
||||
|
@ -895,9 +914,12 @@ void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
|
|||
}
|
||||
|
||||
void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
|
||||
InlinedOpenMPRegionScopeRAII Region(*this, S);
|
||||
EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
|
||||
EnsureInsertPoint();
|
||||
LexicalScope Scope(*this, S.getSourceRange());
|
||||
auto &&CodeGen = [&S](CodeGenFunction &CGF) {
|
||||
CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
|
||||
CGF.EnsureInsertPoint();
|
||||
};
|
||||
CGM.getOpenMPRuntime().emitInlinedDirective(*this, CodeGen);
|
||||
}
|
||||
|
||||
void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
|
||||
|
@ -905,7 +927,8 @@ void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
|
|||
llvm::SmallVector<const Expr *, 8> SrcExprs;
|
||||
llvm::SmallVector<const Expr *, 8> DstExprs;
|
||||
llvm::SmallVector<const Expr *, 8> AssignmentOps;
|
||||
// Check if there are any 'copyprivate' clauses associated with this 'single'
|
||||
// Check if there are any 'copyprivate' clauses associated with this
|
||||
// 'single'
|
||||
// construct.
|
||||
auto CopyprivateFilter = [](const OMPClause *C) -> bool {
|
||||
return C->getClauseKind() == OMPC_copyprivate;
|
||||
|
@ -923,12 +946,15 @@ void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
|
|||
AssignmentOps.append(C->assignment_ops().begin(),
|
||||
C->assignment_ops().end());
|
||||
}
|
||||
LexicalScope Scope(*this, S.getSourceRange());
|
||||
// Emit code for 'single' region along with 'copyprivate' clauses
|
||||
CGM.getOpenMPRuntime().emitSingleRegion(*this, [&]() -> void {
|
||||
InlinedOpenMPRegionScopeRAII Region(*this, S);
|
||||
EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
|
||||
EnsureInsertPoint();
|
||||
}, S.getLocStart(), CopyprivateVars, SrcExprs, DstExprs, AssignmentOps);
|
||||
auto &&CodeGen = [&S](CodeGenFunction &CGF) {
|
||||
CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
|
||||
CGF.EnsureInsertPoint();
|
||||
};
|
||||
CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(),
|
||||
CopyprivateVars, SrcExprs, DstExprs,
|
||||
AssignmentOps);
|
||||
// Emit an implicit barrier at the end.
|
||||
if (!S.getSingleClause(OMPC_nowait)) {
|
||||
CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_single);
|
||||
|
@ -936,20 +962,22 @@ void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
|
|||
}
|
||||
|
||||
void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
|
||||
CGM.getOpenMPRuntime().emitMasterRegion(*this, [&]() -> void {
|
||||
InlinedOpenMPRegionScopeRAII Region(*this, S);
|
||||
EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
|
||||
EnsureInsertPoint();
|
||||
}, S.getLocStart());
|
||||
LexicalScope Scope(*this, S.getSourceRange());
|
||||
auto &&CodeGen = [&S](CodeGenFunction &CGF) {
|
||||
CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
|
||||
CGF.EnsureInsertPoint();
|
||||
};
|
||||
CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getLocStart());
|
||||
}
|
||||
|
||||
void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
|
||||
LexicalScope Scope(*this, S.getSourceRange());
|
||||
auto &&CodeGen = [&S](CodeGenFunction &CGF) {
|
||||
CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
|
||||
CGF.EnsureInsertPoint();
|
||||
};
|
||||
CGM.getOpenMPRuntime().emitCriticalRegion(
|
||||
*this, S.getDirectiveName().getAsString(), [&]() -> void {
|
||||
InlinedOpenMPRegionScopeRAII Region(*this, S);
|
||||
EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
|
||||
EnsureInsertPoint();
|
||||
}, S.getLocStart());
|
||||
*this, S.getDirectiveName().getAsString(), CodeGen, S.getLocStart());
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -969,13 +997,21 @@ void CodeGenFunction::EmitOMPParallelSectionsDirective(
|
|||
|
||||
void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
|
||||
// Emit outlined function for task construct.
|
||||
LexicalScope Scope(*this, S.getSourceRange());
|
||||
auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
|
||||
auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
|
||||
auto *I = CS->getCapturedDecl()->param_begin();
|
||||
auto *PartId = std::next(I);
|
||||
// The first function argument for tasks is a thread id, the second one is a
|
||||
// part id (0 for tied tasks, >=0 for untied task).
|
||||
auto &&CodeGen = [PartId, &S](CodeGenFunction &CGF) {
|
||||
if (*PartId) {
|
||||
// TODO: emit code for untied tasks.
|
||||
}
|
||||
CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
|
||||
};
|
||||
auto OutlinedFn =
|
||||
CGM.getOpenMPRuntime().emitTaskOutlinedFunction(S, *I, *std::next(I));
|
||||
CGM.getOpenMPRuntime().emitTaskOutlinedFunction(S, *I, CodeGen);
|
||||
// Check if we should emit tied or untied task.
|
||||
bool Tied = !S.getSingleClause(OMPC_untied);
|
||||
// Check if the task is final
|
||||
|
@ -1305,10 +1341,13 @@ void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
|
|||
S.getAssociatedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
|
||||
if (const auto *EWC = dyn_cast<ExprWithCleanups>(CS))
|
||||
enterFullExpression(EWC);
|
||||
InlinedOpenMPRegionScopeRAII Region(*this, S);
|
||||
|
||||
EmitOMPAtomicExpr(*this, Kind, IsSeqCst, S.getX(), S.getV(), S.getExpr(),
|
||||
S.getUpdateExpr(), S.isXLHSInRHSPart(), S.getLocStart());
|
||||
LexicalScope Scope(*this, S.getSourceRange());
|
||||
auto &&CodeGen = [&S, Kind, IsSeqCst](CodeGenFunction &CGF) {
|
||||
EmitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.getX(), S.getV(), S.getExpr(),
|
||||
S.getUpdateExpr(), S.isXLHSInRHSPart(), S.getLocStart());
|
||||
};
|
||||
CGM.getOpenMPRuntime().emitInlinedDirective(*this, CodeGen);
|
||||
}
|
||||
|
||||
void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &) {
|
||||
|
|
|
@ -192,7 +192,7 @@ public:
|
|||
|
||||
CapturedRegionKind getKind() const { return Kind; }
|
||||
|
||||
void setContextValue(llvm::Value *V) { ThisValue = V; }
|
||||
virtual void setContextValue(llvm::Value *V) { ThisValue = V; }
|
||||
// \brief Retrieve the value of the context parameter.
|
||||
virtual llvm::Value *getContextValue() const { return ThisValue; }
|
||||
|
||||
|
@ -2061,9 +2061,10 @@ private:
|
|||
/// Helpers for the OpenMP loop directives.
|
||||
void EmitOMPLoopBody(const OMPLoopDirective &Directive,
|
||||
bool SeparateIter = false);
|
||||
void EmitOMPInnerLoop(const Stmt &S, bool RequiresCleanup,
|
||||
const Expr *LoopCond, const Expr *IncExpr,
|
||||
const std::function<void()> &BodyGen);
|
||||
void
|
||||
EmitOMPInnerLoop(const Stmt &S, bool RequiresCleanup, const Expr *LoopCond,
|
||||
const Expr *IncExpr,
|
||||
const llvm::function_ref<void(CodeGenFunction &)> &BodyGen);
|
||||
void EmitOMPSimdFinal(const OMPLoopDirective &S);
|
||||
void EmitOMPWorksharingLoop(const OMPLoopDirective &S);
|
||||
void EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind,
|
||||
|
|
|
@ -53,6 +53,6 @@ void parallel_critical() {
|
|||
// TERM_DEBUG: unreachable
|
||||
foo();
|
||||
}
|
||||
// TERM_DEBUG-DAG: [[DBG_LOC_START]] = !MDLocation(line: 44,
|
||||
// TERM_DEBUG-DAG: [[DBG_LOC_END]] = !MDLocation(line: 44,
|
||||
// TERM_DEBUG-DAG: [[DBG_LOC_START]] = !MDLocation(line: [[@LINE-12]],
|
||||
// TERM_DEBUG-DAG: [[DBG_LOC_END]] = !MDLocation(line: [[@LINE-3]],
|
||||
#endif
|
||||
|
|
|
@ -61,7 +61,7 @@ void parallel_master() {
|
|||
// TERM_DEBUG: unreachable
|
||||
foo();
|
||||
}
|
||||
// TERM_DEBUG-DAG: [[DBG_LOC_START]] = !MDLocation(line: 52,
|
||||
// TERM_DEBUG-DAG: [[DBG_LOC_END]] = !MDLocation(line: 52,
|
||||
// TERM_DEBUG-DAG: [[DBG_LOC_START]] = !MDLocation(line: [[@LINE-12]],
|
||||
// TERM_DEBUG-DAG: [[DBG_LOC_END]] = !MDLocation(line: [[@LINE-3]],
|
||||
|
||||
#endif
|
||||
|
|
|
@ -39,7 +39,7 @@ int main (int argc, char **argv) {
|
|||
// CHECK: [[ARGC_REF:%.+]] = getelementptr inbounds %struct.anon, %struct.anon* [[AGG_CAPTURED]], i32 0, i32 0
|
||||
// CHECK-NEXT: store i32* {{%[a-z0-9.]+}}, i32** [[ARGC_REF]]
|
||||
// CHECK-NEXT: [[BITCAST:%.+]] = bitcast %struct.anon* [[AGG_CAPTURED]] to i8*
|
||||
// CHECK-NEXT: call void (%ident_t*, i32, void (i32*, i32*, ...)*, ...)* @__kmpc_fork_call(%ident_t* [[DEF_LOC_2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), i8* [[BITCAST]])
|
||||
// CHECK-NEXT: call void (%ident_t*, i32, void (i32*, i32*, ...)*, ...)* @__kmpc_fork_call(%ident_t* [[DEF_LOC_2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i8* [[BITCAST]])
|
||||
// CHECK-NEXT: [[ARGV:%.+]] = load i8**, i8*** {{%[a-z0-9.]+}}
|
||||
// CHECK-NEXT: [[RET:%.+]] = call {{[a-z]*[ ]?i32}} [[TMAIN:@.+tmain.+]](i8** [[ARGV]])
|
||||
// CHECK-NEXT: ret i32 [[RET]]
|
||||
|
@ -55,13 +55,13 @@ int main (int argc, char **argv) {
|
|||
// CHECK-DEBUG-NEXT: [[KMPC_LOC_PSOURCE_REF:%.+]] = getelementptr inbounds %ident_t, %ident_t* [[LOC_2_ADDR]], i32 0, i32 4
|
||||
// CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.+}} x i8], [{{.+}} x i8]* [[LOC1]], i32 0, i32 0), i8** [[KMPC_LOC_PSOURCE_REF]]
|
||||
// CHECK-DEBUG-NEXT: [[BITCAST:%.+]] = bitcast %struct.anon* [[AGG_CAPTURED]] to i8*
|
||||
// CHECK-DEBUG-NEXT: call void (%ident_t*, i32, void (i32*, i32*, ...)*, ...)* @__kmpc_fork_call(%ident_t* [[LOC_2_ADDR]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), i8* [[BITCAST]])
|
||||
// CHECK-DEBUG-NEXT: call void (%ident_t*, i32, void (i32*, i32*, ...)*, ...)* @__kmpc_fork_call(%ident_t* [[LOC_2_ADDR]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i8* [[BITCAST]])
|
||||
// CHECK-DEBUG-NEXT: [[ARGV:%.+]] = load i8**, i8*** {{%[a-z0-9.]+}}
|
||||
// CHECK-DEBUG-NEXT: [[RET:%.+]] = call i32 [[TMAIN:@.+tmain.+]](i8** [[ARGV]])
|
||||
// CHECK-DEBUG-NEXT: ret i32 [[RET]]
|
||||
// CHECK-DEBUG-NEXT: }
|
||||
|
||||
// CHECK-LABEL: define internal void @.omp_outlined.(i32* %.global_tid., i32* %.bound_tid., %struct.anon* %__context)
|
||||
// CHECK: define internal void [[OMP_OUTLINED]](i32* %.global_tid., i32* %.bound_tid., %struct.anon* %__context)
|
||||
// CHECK: #[[FN_ATTRS:[0-9]+]]
|
||||
// CHECK: [[CONTEXT_ADDR:%.+]] = alloca %struct.anon*
|
||||
// CHECK: store %struct.anon* %__context, %struct.anon** [[CONTEXT_ADDR]]
|
||||
|
@ -70,11 +70,12 @@ int main (int argc, char **argv) {
|
|||
// CHECK-NEXT: [[ARGC_REF:%.+]] = load i32*, i32** [[ARGC_PTR_REF]]
|
||||
// CHECK-NEXT: [[ARGC:%.+]] = load i32, i32* [[ARGC_REF]]
|
||||
// CHECK-NEXT: invoke void [[FOO:@.+foo.+]](i32{{[ ]?[a-z]*}} [[ARGC]])
|
||||
// CHECK: call {{.+}} @__kmpc_cancel_barrier(
|
||||
// CHECK: ret void
|
||||
// CHECK: call void @{{.+terminate.*|abort}}(
|
||||
// CHECK-NEXT: unreachable
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-DEBUG-LABEL: define internal void @.omp_outlined.(i32* %.global_tid., i32* %.bound_tid., %struct.anon* %__context)
|
||||
// CHECK-DEBUG: define internal void [[OMP_OUTLINED]](i32* %.global_tid., i32* %.bound_tid., %struct.anon* %__context)
|
||||
// CHECK-DEBUG: #[[FN_ATTRS:[0-9]+]]
|
||||
// CHECK-DEBUG: [[CONTEXT_ADDR:%.+]] = alloca %struct.anon*
|
||||
// CHECK-DEBUG: store %struct.anon* %__context, %struct.anon** [[CONTEXT_ADDR]]
|
||||
|
@ -83,6 +84,7 @@ int main (int argc, char **argv) {
|
|||
// CHECK-DEBUG-NEXT: [[ARGC_REF:%.+]] = load i32*, i32** [[ARGC_PTR_REF]]
|
||||
// CHECK-DEBUG-NEXT: [[ARGC:%.+]] = load i32, i32* [[ARGC_REF]]
|
||||
// CHECK-DEBUG-NEXT: invoke void [[FOO:@.+foo.+]](i32 [[ARGC]])
|
||||
// CHECK-DEBUG: call {{.+}} @__kmpc_cancel_barrier(
|
||||
// CHECK-DEBUG: ret void
|
||||
// CHECK-DEBUG: call void @{{.+terminate.*|abort}}(
|
||||
// CHECK-DEBUG-NEXT: unreachable
|
||||
|
@ -98,7 +100,7 @@ int main (int argc, char **argv) {
|
|||
// CHECK: [[ARGC_REF:%.+]] = getelementptr inbounds %struct.anon.0, %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 0
|
||||
// CHECK-NEXT: store i8*** {{%[a-z0-9.]+}}, i8**** [[ARGC_REF]]
|
||||
// CHECK-NEXT: [[BITCAST:%.+]] = bitcast %struct.anon.0* [[AGG_CAPTURED]] to i8*
|
||||
// CHECK-NEXT: call void (%ident_t*, i32, void (i32*, i32*, ...)*, ...)* @__kmpc_fork_call(%ident_t* [[DEF_LOC_2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined.1 to void (i32*, i32*, ...)*), i8* [[BITCAST]])
|
||||
// CHECK-NEXT: call void (%ident_t*, i32, void (i32*, i32*, ...)*, ...)* @__kmpc_fork_call(%ident_t* [[DEF_LOC_2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i8* [[BITCAST]])
|
||||
// CHECK-NEXT: ret i32 0
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-DEBUG: define linkonce_odr i32 [[TMAIN]](i8** %argc)
|
||||
|
@ -112,11 +114,11 @@ int main (int argc, char **argv) {
|
|||
// CHECK-DEBUG-NEXT: [[KMPC_LOC_PSOURCE_REF:%.+]] = getelementptr inbounds %ident_t, %ident_t* [[LOC_2_ADDR]], i32 0, i32 4
|
||||
// CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.+}} x i8], [{{.+}} x i8]* [[LOC2]], i32 0, i32 0), i8** [[KMPC_LOC_PSOURCE_REF]]
|
||||
// CHECK-DEBUG-NEXT: [[BITCAST:%.+]] = bitcast %struct.anon.0* [[AGG_CAPTURED]] to i8*
|
||||
// CHECK-DEBUG-NEXT: call void (%ident_t*, i32, void (i32*, i32*, ...)*, ...)* @__kmpc_fork_call(%ident_t* [[LOC_2_ADDR]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined.1 to void (i32*, i32*, ...)*), i8* [[BITCAST]])
|
||||
// CHECK-DEBUG-NEXT: call void (%ident_t*, i32, void (i32*, i32*, ...)*, ...)* @__kmpc_fork_call(%ident_t* [[LOC_2_ADDR]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i8* [[BITCAST]])
|
||||
// CHECK-DEBUG-NEXT: ret i32 0
|
||||
// CHECK-DEBUG-NEXT: }
|
||||
|
||||
// CHECK-LABEL: define internal void @.omp_outlined.1(i32* %.global_tid., i32* %.bound_tid., %struct.anon.0* %__context)
|
||||
// CHECK: define internal void [[OMP_OUTLINED]](i32* %.global_tid., i32* %.bound_tid., %struct.anon.0* %__context)
|
||||
// CHECK: [[CONTEXT_ADDR:%.+]] = alloca %struct.anon.0*
|
||||
// CHECK: store %struct.anon.0* %__context, %struct.anon.0** [[CONTEXT_ADDR]]
|
||||
// CHECK: [[CONTEXT_PTR:%.+]] = load %struct.anon.0*, %struct.anon.0** [[CONTEXT_ADDR]]
|
||||
|
@ -124,11 +126,12 @@ int main (int argc, char **argv) {
|
|||
// CHECK-NEXT: [[ARGC_REF:%.+]] = load i8***, i8**** [[ARGC_PTR_REF]]
|
||||
// CHECK-NEXT: [[ARGC:%.+]] = load i8**, i8*** [[ARGC_REF]]
|
||||
// CHECK-NEXT: invoke void [[FOO1:@.+foo.+]](i8** [[ARGC]])
|
||||
// CHECK: call {{.+}} @__kmpc_cancel_barrier(
|
||||
// CHECK: ret void
|
||||
// CHECK: call void @{{.+terminate.*|abort}}(
|
||||
// CHECK-NEXT: unreachable
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-DEBUG-LABEL: define internal void @.omp_outlined.1(i32* %.global_tid., i32* %.bound_tid., %struct.anon.0* %__context)
|
||||
// CHECK-DEBUG: define internal void [[OMP_OUTLINED]](i32* %.global_tid., i32* %.bound_tid., %struct.anon.0* %__context)
|
||||
// CHECK-DEBUG: [[CONTEXT_ADDR:%.+]] = alloca %struct.anon.0*
|
||||
// CHECK-DEBUG: store %struct.anon.0* %__context, %struct.anon.0** [[CONTEXT_ADDR]]
|
||||
// CHECK-DEBUG: [[CONTEXT_PTR:%.+]] = load %struct.anon.0*, %struct.anon.0** [[CONTEXT_ADDR]]
|
||||
|
@ -136,6 +139,7 @@ int main (int argc, char **argv) {
|
|||
// CHECK-DEBUG-NEXT: [[ARGC_REF:%.+]] = load i8***, i8**** [[ARGC_PTR_REF]]
|
||||
// CHECK-DEBUG-NEXT: [[ARGC:%.+]] = load i8**, i8*** [[ARGC_REF]]
|
||||
// CHECK-DEBUG-NEXT: invoke void [[FOO1:@.+foo.+]](i8** [[ARGC]])
|
||||
// CHECK-DEBUG: call {{.+}} @__kmpc_cancel_barrier(
|
||||
// CHECK-DEBUG: ret void
|
||||
// CHECK-DEBUG: call void @{{.+terminate.*|abort}}(
|
||||
// CHECK-DEBUG-NEXT: unreachable
|
||||
|
|
|
@ -96,6 +96,7 @@ int main() {
|
|||
// CHECK-NEXT: br label %[[END]]
|
||||
// CHECK: [[END]]
|
||||
// CHECK-NEXT: call i32 @__kmpc_cancel_barrier(%{{.+}}* [[IMPLICIT_BARRIER_SINGLE_LOC]],
|
||||
// CHECK-NEXT: call i32 @__kmpc_cancel_barrier(
|
||||
// CHECK-NEXT: ret
|
||||
// CHECK: [[TERM_LPAD]]
|
||||
// CHECK: call void @__clang_call_terminate(i8*
|
||||
|
|
|
@ -129,6 +129,6 @@ void parallel_single() {
|
|||
foo();
|
||||
}
|
||||
// TERM_DEBUG-DAG: [[DBG_LOC_START]] = !MDLocation(line: [[@LINE-12]],
|
||||
// TERM_DEBUG-DAG: [[DBG_LOC_END]] = !MDLocation(line: [[@LINE-13]],
|
||||
// TERM_DEBUG-DAG: [[DBG_LOC_END]] = !MDLocation(line: [[@LINE-3]],
|
||||
|
||||
#endif
|
||||
|
|
Loading…
Reference in New Issue