[OPENMP 4.1] Codegen for ‘simd’ clause in ‘ordered’ directive.
Description. If the simd clause is specified, the ordered regions encountered by any thread will use only a single SIMD lane to execute the ordered regions in the order of the loop iterations. Restrictions. An ordered construct with the simd clause is the only OpenMP construct that can appear in the simd region. An ordered directive with ‘simd’ clause is generated as an outlined function and corresponding function call to prevent this part of code from vectorization later in backend. llvm-svn: 248772
This commit is contained in:
parent
0c12a3949e
commit
5f600d6a49
|
@ -1548,21 +1548,21 @@ void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
|
|||
|
||||
void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
|
||||
const RegionCodeGenTy &OrderedOpGen,
|
||||
SourceLocation Loc) {
|
||||
SourceLocation Loc, bool IsThreads) {
|
||||
// __kmpc_ordered(ident_t *, gtid);
|
||||
// OrderedOpGen();
|
||||
// __kmpc_end_ordered(ident_t *, gtid);
|
||||
// Prepare arguments and build a call to __kmpc_ordered
|
||||
{
|
||||
CodeGenFunction::RunCleanupsScope Scope(CGF);
|
||||
CodeGenFunction::RunCleanupsScope Scope(CGF);
|
||||
if (IsThreads) {
|
||||
llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
|
||||
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_ordered), Args);
|
||||
// Build a call to __kmpc_end_ordered
|
||||
CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>(
|
||||
NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_ordered),
|
||||
llvm::makeArrayRef(Args));
|
||||
emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
|
||||
}
|
||||
emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
|
||||
}
|
||||
|
||||
void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
|
||||
|
|
|
@ -449,7 +449,7 @@ public:
|
|||
/// ordered region.
|
||||
virtual void emitOrderedRegion(CodeGenFunction &CGF,
|
||||
const RegionCodeGenTy &OrderedOpGen,
|
||||
SourceLocation Loc);
|
||||
SourceLocation Loc, bool IsThreads);
|
||||
|
||||
/// \brief Emit an implicit/explicit barrier for OpenMP threads.
|
||||
/// \param Kind Directive for which this implicit barrier call must be
|
||||
|
|
|
@ -1799,13 +1799,33 @@ void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
|
|||
}(), S.getLocStart());
|
||||
}
|
||||
|
||||
static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM,
|
||||
const CapturedStmt *S) {
|
||||
CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
|
||||
CodeGenFunction::CGCapturedStmtInfo CapStmtInfo;
|
||||
CGF.CapturedStmtInfo = &CapStmtInfo;
|
||||
auto *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S);
|
||||
Fn->addFnAttr(llvm::Attribute::NoInline);
|
||||
return Fn;
|
||||
}
|
||||
|
||||
void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
|
||||
LexicalScope Scope(*this, S.getSourceRange());
|
||||
auto &&CodeGen = [&S](CodeGenFunction &CGF) {
|
||||
CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
|
||||
auto *C = S.getSingleClause<OMPSIMDClause>();
|
||||
auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF) {
|
||||
if (C) {
|
||||
auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
|
||||
llvm::SmallVector<llvm::Value *, 16> CapturedVars;
|
||||
CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
|
||||
auto *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS);
|
||||
CGF.EmitNounwindRuntimeCall(OutlinedFn, CapturedVars);
|
||||
} else {
|
||||
CGF.EmitStmt(
|
||||
cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
|
||||
}
|
||||
CGF.EnsureInsertPoint();
|
||||
};
|
||||
CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getLocStart());
|
||||
CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getLocStart(), !C);
|
||||
}
|
||||
|
||||
static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val,
|
||||
|
|
|
@ -213,5 +213,22 @@ void runtime(float *a, float *b, float *c, float *d) {
|
|||
// CHECK: ret void
|
||||
}
|
||||
|
||||
float f[10];
|
||||
// CHECK-LABEL: foo_simd
|
||||
void foo_simd(int low, int up) {
|
||||
// CHECK: store float 0.000000e+00, float* %{{.+}}, align {{[0-9]+}}, !llvm.mem.parallel_loop_access !
|
||||
// CHECK-NEXT: call void [[CAP_FUNC:@.+]](i32* %{{.+}}) #{{[0-9]+}}, !llvm.mem.parallel_loop_access !
|
||||
#pragma omp simd
|
||||
for (int i = low; i < up; ++i) {
|
||||
f[i] = 0.0;
|
||||
#pragma omp ordered simd
|
||||
f[i] = 1.0;
|
||||
}
|
||||
}
|
||||
|
||||
// CHECK: define internal void [[CAP_FUNC]](i32* dereferenceable({{[0-9]+}}) %{{.+}}) #
|
||||
// CHECK: store float 1.000000e+00, float* %{{.+}}, align
|
||||
// CHECK-NEXT: ret void
|
||||
|
||||
#endif // HEADER
|
||||
|
||||
|
|
Loading…
Reference in New Issue