[OPENMP 4.1] Codegen for ‘simd’ clause in ‘ordered’ directive.

Description.
If the simd clause is specified, the ordered regions encountered by any thread will use only a single SIMD lane to execute the ordered regions in the order of the loop iterations.
Restrictions.
An ordered construct with the simd clause is the only OpenMP construct that can appear in the simd region.

An ordered directive with ‘simd’ clause is generated as an outlined function and corresponding function call to prevent this part of code from vectorization later in backend.

llvm-svn: 248772
This commit is contained in:
Alexey Bataev 2015-09-29 03:48:57 +00:00
parent 0c12a3949e
commit 5f600d6a49
4 changed files with 45 additions and 8 deletions

View File

@ -1548,21 +1548,21 @@ void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
const RegionCodeGenTy &OrderedOpGen,
SourceLocation Loc) {
SourceLocation Loc, bool IsThreads) {
// __kmpc_ordered(ident_t *, gtid);
// OrderedOpGen();
// __kmpc_end_ordered(ident_t *, gtid);
// Prepare arguments and build a call to __kmpc_ordered
{
CodeGenFunction::RunCleanupsScope Scope(CGF);
CodeGenFunction::RunCleanupsScope Scope(CGF);
if (IsThreads) {
llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_ordered), Args);
// Build a call to __kmpc_end_ordered
CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>(
NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_ordered),
llvm::makeArrayRef(Args));
emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
}
emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
}
void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,

View File

@ -449,7 +449,7 @@ public:
/// ordered region.
virtual void emitOrderedRegion(CodeGenFunction &CGF,
const RegionCodeGenTy &OrderedOpGen,
SourceLocation Loc);
SourceLocation Loc, bool IsThreads);
/// \brief Emit an implicit/explicit barrier for OpenMP threads.
/// \param Kind Directive for which this implicit barrier call must be

View File

@ -1799,13 +1799,33 @@ void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
}(), S.getLocStart());
}
static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM,
const CapturedStmt *S) {
CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
CodeGenFunction::CGCapturedStmtInfo CapStmtInfo;
CGF.CapturedStmtInfo = &CapStmtInfo;
auto *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S);
Fn->addFnAttr(llvm::Attribute::NoInline);
return Fn;
}
void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
LexicalScope Scope(*this, S.getSourceRange());
auto &&CodeGen = [&S](CodeGenFunction &CGF) {
CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
auto *C = S.getSingleClause<OMPSIMDClause>();
auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF) {
if (C) {
auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
llvm::SmallVector<llvm::Value *, 16> CapturedVars;
CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
auto *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS);
CGF.EmitNounwindRuntimeCall(OutlinedFn, CapturedVars);
} else {
CGF.EmitStmt(
cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
}
CGF.EnsureInsertPoint();
};
CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getLocStart());
CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getLocStart(), !C);
}
static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val,

View File

@ -213,5 +213,22 @@ void runtime(float *a, float *b, float *c, float *d) {
// CHECK: ret void
}
float f[10];
// CHECK-LABEL: foo_simd
void foo_simd(int low, int up) {
// CHECK: store float 0.000000e+00, float* %{{.+}}, align {{[0-9]+}}, !llvm.mem.parallel_loop_access !
// CHECK-NEXT: call void [[CAP_FUNC:@.+]](i32* %{{.+}}) #{{[0-9]+}}, !llvm.mem.parallel_loop_access !
#pragma omp simd
for (int i = low; i < up; ++i) {
f[i] = 0.0;
#pragma omp ordered simd
f[i] = 1.0;
}
}
// CHECK: define internal void [[CAP_FUNC]](i32* dereferenceable({{[0-9]+}}) %{{.+}}) #
// CHECK: store float 1.000000e+00, float* %{{.+}}, align
// CHECK-NEXT: ret void
#endif // HEADER