Support ‘omp for’ with static chunked schedule kind.
Differential Revision: http://reviews.llvm.org/D7006 llvm-svn: 226795
This commit is contained in:
parent
3f68fae900
commit
df7a8e2bc8
|
@ -848,6 +848,12 @@ bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
|
|||
return Schedule == OMP_sch_static;
|
||||
}
|
||||
|
||||
bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
|
||||
auto Schedule = getRuntimeSchedule(ScheduleKind, /* Chunked */ false);
|
||||
assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
|
||||
return Schedule != OMP_sch_static;
|
||||
}
|
||||
|
||||
void CGOpenMPRuntime::EmitOMPForInit(CodeGenFunction &CGF, SourceLocation Loc,
|
||||
OpenMPScheduleClauseKind ScheduleKind,
|
||||
unsigned IVSize, bool IVSigned,
|
||||
|
|
|
@ -320,6 +320,12 @@ public:
|
|||
virtual bool isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
|
||||
bool Chunked) const;
|
||||
|
||||
/// \brief Check if the specified \a ScheduleKind is dynamic.
|
||||
/// This kind of worksharing directive is emitted without outer loop.
|
||||
/// \param ScheduleKind Schedule Kind specified in the 'schedule' clause.
|
||||
///
|
||||
virtual bool isDynamic(OpenMPScheduleClauseKind ScheduleKind) const;
|
||||
|
||||
/// \brief Call the appropriate runtime routine to initialize it before start
|
||||
/// of loop.
|
||||
///
|
||||
|
|
|
@ -500,6 +500,89 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
|
|||
DI->EmitLexicalBlockEnd(Builder, S.getSourceRange().getEnd());
|
||||
}
|
||||
|
||||
void CodeGenFunction::EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind,
|
||||
const OMPLoopDirective &S,
|
||||
OMPPrivateScope &LoopScope,
|
||||
llvm::Value *LB, llvm::Value *UB,
|
||||
llvm::Value *ST, llvm::Value *IL,
|
||||
llvm::Value *Chunk) {
|
||||
auto &RT = CGM.getOpenMPRuntime();
|
||||
assert(!RT.isStaticNonchunked(ScheduleKind, /* Chunked */ Chunk != nullptr) &&
|
||||
"static non-chunked schedule does not need outer loop");
|
||||
if (RT.isDynamic(ScheduleKind)) {
|
||||
ErrorUnsupported(&S, "OpenMP loop with dynamic schedule");
|
||||
return;
|
||||
}
|
||||
|
||||
// Emit outer loop.
|
||||
//
|
||||
// OpenMP [2.7.1, Loop Construct, Description, table 2-1]
|
||||
// When schedule(static, chunk_size) is specified, iterations are divided into
|
||||
// chunks of size chunk_size, and the chunks are assigned to the threads in
|
||||
// the team in a round-robin fashion in the order of the thread number.
|
||||
//
|
||||
// while(UB = min(UB, GlobalUB), idx = LB, idx < UB) {
|
||||
// while (idx <= UB) { BODY; ++idx; } // inner loop
|
||||
// LB = LB + ST;
|
||||
// UB = UB + ST;
|
||||
// }
|
||||
//
|
||||
const Expr *IVExpr = S.getIterationVariable();
|
||||
const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
|
||||
const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
|
||||
|
||||
RT.EmitOMPForInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned, IL,
|
||||
LB, UB, ST, Chunk);
|
||||
auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end");
|
||||
|
||||
// Start the loop with a block that tests the condition.
|
||||
auto CondBlock = createBasicBlock("omp.dispatch.cond");
|
||||
EmitBlock(CondBlock);
|
||||
LoopStack.push(CondBlock);
|
||||
|
||||
llvm::Value *BoolCondVal = nullptr;
|
||||
// UB = min(UB, GlobalUB)
|
||||
EmitIgnoredExpr(S.getEnsureUpperBound());
|
||||
// IV = LB
|
||||
EmitIgnoredExpr(S.getInit());
|
||||
// IV < UB
|
||||
BoolCondVal = EvaluateExprAsBool(S.getCond(false));
|
||||
|
||||
// If there are any cleanups between here and the loop-exit scope,
|
||||
// create a block to stage a loop exit along.
|
||||
auto ExitBlock = LoopExit.getBlock();
|
||||
if (LoopScope.requiresCleanups())
|
||||
ExitBlock = createBasicBlock("omp.dispatch.cleanup");
|
||||
|
||||
auto LoopBody = createBasicBlock("omp.dispatch.body");
|
||||
Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock);
|
||||
if (ExitBlock != LoopExit.getBlock()) {
|
||||
EmitBlock(ExitBlock);
|
||||
EmitBranchThroughCleanup(LoopExit);
|
||||
}
|
||||
EmitBlock(LoopBody);
|
||||
|
||||
// Create a block for the increment.
|
||||
auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc");
|
||||
BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
|
||||
|
||||
EmitOMPInnerLoop(S, LoopScope);
|
||||
|
||||
EmitBlock(Continue.getBlock());
|
||||
BreakContinueStack.pop_back();
|
||||
// Emit "LB = LB + Stride", "UB = UB + Stride".
|
||||
EmitIgnoredExpr(S.getNextLowerBound());
|
||||
EmitIgnoredExpr(S.getNextUpperBound());
|
||||
|
||||
EmitBranch(CondBlock);
|
||||
LoopStack.pop();
|
||||
// Emit the fall-through block.
|
||||
EmitBlock(LoopExit.getBlock());
|
||||
|
||||
// Tell the runtime we are done.
|
||||
RT.EmitOMPForFinish(*this, S.getLocStart(), ScheduleKind);
|
||||
}
|
||||
|
||||
/// \brief Emit a helper variable and return corresponding lvalue.
|
||||
static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
|
||||
const DeclRefExpr *Helper) {
|
||||
|
@ -581,8 +664,13 @@ void CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) {
|
|||
EmitOMPInnerLoop(S, LoopScope);
|
||||
// Tell the runtime we are done.
|
||||
RT.EmitOMPForFinish(*this, S.getLocStart(), ScheduleKind);
|
||||
} else
|
||||
ErrorUnsupported(&S, "OpenMP loop with requested schedule");
|
||||
} else {
|
||||
// Emit the outer loop, which requests its work chunk [LB..UB] from
|
||||
// runtime and runs the inner loop to process it.
|
||||
EmitOMPForOuterLoop(ScheduleKind, S, LoopScope, LB.getAddress(),
|
||||
UB.getAddress(), ST.getAddress(), IL.getAddress(),
|
||||
Chunk);
|
||||
}
|
||||
}
|
||||
// We're now done with the loop, so jump to the continuation block.
|
||||
EmitBranch(ContBlock);
|
||||
|
|
|
@ -27,6 +27,7 @@
|
|||
#include "clang/AST/Type.h"
|
||||
#include "clang/Basic/ABI.h"
|
||||
#include "clang/Basic/CapturedStmt.h"
|
||||
#include "clang/Basic/OpenMPKinds.h"
|
||||
#include "clang/Basic/TargetInfo.h"
|
||||
#include "clang/Frontend/CodeGenOptions.h"
|
||||
#include "llvm/ADT/ArrayRef.h"
|
||||
|
@ -2052,6 +2053,11 @@ private:
|
|||
bool SeparateIter = false);
|
||||
void EmitOMPSimdFinal(const OMPLoopDirective &S);
|
||||
void EmitOMPWorksharingLoop(const OMPLoopDirective &S);
|
||||
void EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind,
|
||||
const OMPLoopDirective &S,
|
||||
OMPPrivateScope &LoopScope, llvm::Value *LB,
|
||||
llvm::Value *UB, llvm::Value *ST, llvm::Value *IL,
|
||||
llvm::Value *Chunk);
|
||||
|
||||
public:
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// RUN: %clang_cc1 -verify -fopenmp=libiomp5 -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s
|
||||
// RUN: %clang_cc1 -verify -fopenmp=libiomp5 -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s
|
||||
// RUN: %clang_cc1 -fopenmp=libiomp5 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
|
||||
// RUN: %clang_cc1 -fopenmp=libiomp5 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -g -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
|
||||
//
|
||||
|
@ -87,5 +87,64 @@ void static_not_chunked(float *a, float *b, float *c, float *d) {
|
|||
// CHECK: ret void
|
||||
}
|
||||
|
||||
// CHECK-LABEL: define {{.*void}} @{{.*}}static_chunked{{.*}}(float* {{.+}}, float* {{.+}}, float* {{.+}}, float* {{.+}})
|
||||
void static_chunked(float *a, float *b, float *c, float *d) {
|
||||
// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEFAULT_LOC:[@%].+]])
|
||||
#pragma omp for schedule(static, 5)
|
||||
// CHECK: call void @__kmpc_for_static_init_4u([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 33, i32* [[IS_LAST:%[^,]+]], i32* [[OMP_LB:%[^,]+]], i32* [[OMP_UB:%[^,]+]], i32* [[OMP_ST:%[^,]+]], i32 1, i32 5)
|
||||
// UB = min(UB, GlobalUB)
|
||||
// CHECK: [[UB:%.+]] = load i32* [[OMP_UB]]
|
||||
// CHECK-NEXT: [[UBCMP:%.+]] = icmp ugt i32 [[UB]], 16908288
|
||||
// CHECK-NEXT: br i1 [[UBCMP]], label [[UB_TRUE:%[^,]+]], label [[UB_FALSE:%[^,]+]]
|
||||
// CHECK: [[UBRESULT:%.+]] = phi i32 [ 16908288, [[UB_TRUE]] ], [ [[UBVAL:%[^,]+]], [[UB_FALSE]] ]
|
||||
// CHECK-NEXT: store i32 [[UBRESULT]], i32* [[OMP_UB]]
|
||||
// CHECK-NEXT: [[LB:%.+]] = load i32* [[OMP_LB]]
|
||||
// CHECK-NEXT: store i32 [[LB]], i32* [[OMP_IV:[^,]+]]
|
||||
|
||||
// Outer loop header
|
||||
// CHECK: [[O_IV:%.+]] = load i32* [[OMP_IV]]
|
||||
// CHECK-NEXT: [[O_UB:%.+]] = load i32* [[OMP_UB]]
|
||||
// CHECK-NEXT: [[O_CMP:%.+]] = icmp ule i32 [[O_IV]], [[O_UB]]
|
||||
// CHECK-NEXT: br i1 [[O_CMP]], label %[[O_LOOP1_BODY:[^,]+]], label %[[O_LOOP1_END:[^,]+]]
|
||||
|
||||
// Loop header
|
||||
// CHECK: [[O_LOOP1_BODY]]
|
||||
// CHECK: [[IV:%.+]] = load i32* [[OMP_IV]]
|
||||
// CHECK-NEXT: [[UB:%.+]] = load i32* [[OMP_UB]]
|
||||
// CHECK-NEXT: [[CMP:%.+]] = icmp ule i32 [[IV]], [[UB]]
|
||||
// CHECK-NEXT: br i1 [[CMP]], label %[[LOOP1_BODY:[^,]+]], label %[[LOOP1_END:[^,]+]]
|
||||
for (unsigned i = 131071; i <= 2147483647; i += 127) {
|
||||
// CHECK: [[LOOP1_BODY]]
|
||||
// Start of body: calculate i from IV:
|
||||
// CHECK: [[IV1_1:%.+]] = load i32* [[OMP_IV]]
|
||||
// CHECK-NEXT: [[CALC_I_1:%.+]] = mul i32 [[IV1_1]], 127
|
||||
// CHECK-NEXT: [[CALC_I_2:%.+]] = add i32 131071, [[CALC_I_1]]
|
||||
// CHECK-NEXT: store i32 [[CALC_I_2]], i32* [[LC_I:.+]]
|
||||
// ... loop body ...
|
||||
// End of body: store into a[i]:
|
||||
// CHECK: store float [[RESULT:%.+]], float* {{%.+}}
|
||||
a[i] = b[i] * c[i] * d[i];
|
||||
// CHECK: [[IV1_2:%.+]] = load i32* [[OMP_IV]]{{.*}}
|
||||
// CHECK-NEXT: [[ADD1_2:%.+]] = add i32 [[IV1_2]], 1
|
||||
// CHECK-NEXT: store i32 [[ADD1_2]], i32* [[OMP_IV]]
|
||||
// CHECK-NEXT: br label %{{.+}}
|
||||
}
|
||||
// CHECK: [[LOOP1_END]]
|
||||
// Update the counters, adding stride
|
||||
// CHECK: [[LB:%.+]] = load i32* [[OMP_LB]]
|
||||
// CHECK-NEXT: [[ST:%.+]] = load i32* [[OMP_ST]]
|
||||
// CHECK-NEXT: [[ADD_LB:%.+]] = add i32 [[LB]], [[ST]]
|
||||
// CHECK-NEXT: store i32 [[ADD_LB]], i32* [[OMP_LB]]
|
||||
// CHECK-NEXT: [[UB:%.+]] = load i32* [[OMP_UB]]
|
||||
// CHECK-NEXT: [[ST:%.+]] = load i32* [[OMP_ST]]
|
||||
// CHECK-NEXT: [[ADD_UB:%.+]] = add i32 [[UB]], [[ST]]
|
||||
// CHECK-NEXT: store i32 [[ADD_UB]], i32* [[OMP_UB]]
|
||||
|
||||
// CHECK: [[O_LOOP1_END]]
|
||||
// CHECK: call void @__kmpc_for_static_fini([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
|
||||
// CHECK: call {{.+}} @__kmpc_cancel_barrier([[IDENT_T_TY]]* [[DEFAULT_LOC_BARRIER:[@%].+]], i32 [[GTID]])
|
||||
// CHECK: ret void
|
||||
}
|
||||
|
||||
#endif // HEADER
|
||||
|
||||
|
|
Loading…
Reference in New Issue