[OPENMP 4.5] Codegen for 'grainsize/num_tasks' clauses of 'taskloop'

directive.

OpenMP 4.5 defines 'taskloop' directive and 2 additional clauses
'grainsize' and 'num_tasks' for this directive. Patch adds codegen for
these clauses.
These clauses are generated as arguments of the '__kmpc_taskloop'
libcall and are encoded the following way:

void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup,  int sched, kmp_uint64 grainsize, void *task_dup);

If 'grainsize' is specified, 'sched' argument must be set to '1' and
'grainsize' argument must be set to the value of the 'grainsize' clause.
If 'num_tasks' is specified, 'sched' argument must be set to '2' and
'grainsize' argument must be set to the value of the 'num_tasks' clause.
It is possible because these 2 clauses are mutually exclusive and can't
be used at the same time on the same directive.
If none of these clauses is specified, 'sched' argument must be set to
'0'.

llvm-svn: 267862
This commit is contained in:
Alexey Bataev 2016-04-28 09:15:06 +00:00
parent 35e9ea3812
commit 2b19a6fe53
4 changed files with 42 additions and 21 deletions

View File

@ -3777,7 +3777,8 @@ void CGOpenMPRuntime::emitTaskCall(
void CGOpenMPRuntime::emitTaskLoopCall(
CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
bool Tied, llvm::PointerIntPair<llvm::Value *, 1, bool> Final, bool Nogroup,
bool Tied, llvm::PointerIntPair<llvm::Value *, 1, bool> Final,
llvm::PointerIntPair<llvm::Value *, 1, bool> Schedule, bool Nogroup,
unsigned NumberOfParts, llvm::Value *TaskFunction, QualType SharedsTy,
Address Shareds, const Expr *IfCond, ArrayRef<const Expr *> PrivateVars,
ArrayRef<const Expr *> PrivateCopies,
@ -3825,17 +3826,19 @@ void CGOpenMPRuntime::emitTaskLoopCall(
cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(),
/*IsInitializer=*/true);
enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
llvm::Value *TaskArgs[] = {
UpLoc,
ThreadID,
Data.NewTask,
IfVal,
LBLVal.getPointer(),
UBLVal.getPointer(),
CGF.EmitLoadOfScalar(StLVal, SourceLocation()),
UpLoc, ThreadID, Data.NewTask, IfVal, LBLVal.getPointer(),
UBLVal.getPointer(), CGF.EmitLoadOfScalar(StLVal, SourceLocation()),
llvm::ConstantInt::getSigned(CGF.IntTy, Nogroup ? 1 : 0),
llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/0),
llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
llvm::ConstantInt::getSigned(
CGF.IntTy, Schedule.getPointer()
? Schedule.getInt() ? NumTasks : Grainsize
: NoSchedule),
Schedule.getPointer()
? CGF.Builder.CreateIntCast(Schedule.getPointer(), CGF.Int64Ty,
/*isSigned=*/false)
: llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs);
}

View File

@ -858,6 +858,9 @@ public:
/// \param Final Contains either constant bool value, or llvm::Value * of i1
/// type for final clause. If the value is true, the task forces all of its
/// child tasks to become final and included tasks.
/// \param Schedule If Pointer is nullptr, no grainsize/num_tasks clauses were
/// specified. If IntVal is false - it is for grainsize clause, true - for
/// num_tasks clause.
/// \param Nogroup true if nogroup clause was specified, false otherwise.
/// \param NumberOfParts Number of parts in untied taskloops.
/// \param TaskFunction An LLVM function with type void (*)(i32 /*gtid*/, i32
@ -881,9 +884,10 @@ public:
virtual void emitTaskLoopCall(
CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
bool Tied, llvm::PointerIntPair<llvm::Value *, 1, bool> Final,
bool Nogroup, unsigned NumberOfParts, llvm::Value *TaskFunction,
QualType SharedsTy, Address Shareds, const Expr *IfCond,
ArrayRef<const Expr *> PrivateVars, ArrayRef<const Expr *> PrivateCopies,
llvm::PointerIntPair<llvm::Value *, 1, bool> Schedule, bool Nogroup,
unsigned NumberOfParts, llvm::Value *TaskFunction, QualType SharedsTy,
Address Shareds, const Expr *IfCond, ArrayRef<const Expr *> PrivateVars,
ArrayRef<const Expr *> PrivateCopies,
ArrayRef<const Expr *> FirstprivateVars,
ArrayRef<const Expr *> FirstprivateCopies,
ArrayRef<const Expr *> FirstprivateInits);

View File

@ -3362,6 +3362,18 @@ void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
// By default the task is not final.
Final.setInt(/*IntVal=*/false);
}
llvm::PointerIntPair<llvm::Value * /*no grainsize/num_tasks=nullptr*/, 1,
bool /*Grainsize=false, NumTasks=true*/>
Schedule;
if (const auto* Clause = S.getSingleClause<OMPGrainsizeClause>()) {
// grainsize clause
Schedule.setInt(/*IntVal=*/false);
Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize()));
} else if (const auto* Clause = S.getSingleClause<OMPNumTasksClause>()) {
// num_tasks clause
Schedule.setInt(/*IntVal=*/true);
Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks()));
}
auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) {
// if (PreCond) {
@ -3433,13 +3445,13 @@ void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
CGF.EmitBlock(ContBlock, true);
}
};
auto &&TaskGen = [&S, SharedsTy, CapturedStruct, IfCond, &Final,
auto &&TaskGen = [&S, SharedsTy, CapturedStruct, IfCond, &Final, &Schedule,
Nogroup](CodeGenFunction &CGF, llvm::Value *OutlinedFn,
const OMPPrivateDataTy &Data) {
auto &&CodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &) {
OMPLoopScope PreInitScope(CGF, S);
CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(
CGF, S.getLocStart(), S, Data.Tied, Final, Nogroup,
CGF, S.getLocStart(), S, Data.Tied, Final, Schedule, Nogroup,
Data.NumberOfParts, OutlinedFn, SharedsTy, CapturedStruct, IfCond,
Data.PrivateVars, Data.PrivateCopies, Data.FirstprivateVars,
Data.FirstprivateCopies, Data.FirstprivateInits);

View File

@ -37,8 +37,9 @@ int main(int argc, char **argv) {
// CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 6
// CHECK: store i64 1, i64* [[ST]],
// CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]],
// CHECK: call void @__kmpc_taskloop(%ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 1, i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 1, i32 0, i64 0, i8* null)
#pragma omp taskloop nogroup
// CHECK: [[GRAINSIZE:%.+]] = zext i32 %{{.+}} to i64
// CHECK: call void @__kmpc_taskloop(%ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 1, i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 1, i32 1, i64 [[GRAINSIZE]], i8* null)
#pragma omp taskloop nogroup grainsize(argc)
for (int i = 0; i < 10; ++i)
;
// CHECK: [[TASKV:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* [[DEFLOC]], i32 [[GTID]], i32 1, i64 64, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TDP_TY:%.+]]*)* [[TASK3:@.+]] to i32 (i32, i8*)*))
@ -55,9 +56,9 @@ int main(int argc, char **argv) {
// CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 6
// CHECK: store i64 1, i64* [[ST]],
// CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]],
// CHECK: call void @__kmpc_taskloop(%ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 [[IF_INT]], i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 0, i32 0, i64 0, i8* null)
// CHECK: call void @__kmpc_taskloop(%ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 [[IF_INT]], i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 0, i32 2, i64 4, i8* null)
int i;
#pragma omp taskloop if(argc) shared(argc, argv) collapse(2)
#pragma omp taskloop if(argc) shared(argc, argv) collapse(2) num_tasks(4)
for (i = 0; i < argc; ++i)
for (int j = argc; j < argv[argc][argc]; ++j)
;
@ -158,8 +159,9 @@ struct S {
// CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 6
// CHECK: store i64 1, i64* [[ST]],
// CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]],
// CHECK: call void @__kmpc_taskloop(%ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 1, i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 0, i32 0, i64 0, i8* null)
#pragma omp taskloop shared(c)
// CHECK: [[NUM_TASKS:%.+]] = zext i32 %{{.+}} to i64
// CHECK: call void @__kmpc_taskloop(%ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 1, i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 0, i32 2, i64 [[NUM_TASKS]], i8* null)
#pragma omp taskloop shared(c) num_tasks(a)
for (a = 0; a < c; ++a)
;
}