From 2b19a6fe539d221472c92715dc90c56cb17854bd Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Thu, 28 Apr 2016 09:15:06 +0000 Subject: [PATCH] [OPENMP 4.5] Codegen for 'grainsize/num_tasks' clauses of 'taskloop' directive. OpenMP 4.5 defines 'taskloop' directive and 2 additional clauses 'grainsize' and 'num_tasks' for this directive. Patch adds codegen for these clauses. These clauses are generated as arguments of the '__kmpc_taskloop' libcall and are encoded the following way: void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int sched, kmp_uint64 grainsize, void *task_dup); If 'grainsize' is specified, 'sched' argument must be set to '1' and 'grainsize' argument must be set to the value of the 'grainsize' clause. If 'num_tasks' is specified, 'sched' argument must be set to '2' and 'grainsize' argument must be set to the value of the 'num_tasks' clause. It is possible because these 2 clauses are mutually exclusive and can't be used at the same time on the same directive. If none of these clauses is specified, 'sched' argument must be set to '0'. llvm-svn: 267862 --- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 23 +++++++++++++---------- clang/lib/CodeGen/CGOpenMPRuntime.h | 10 +++++++--- clang/lib/CodeGen/CGStmtOpenMP.cpp | 16 ++++++++++++++-- clang/test/OpenMP/taskloop_codegen.cpp | 14 ++++++++------ 4 files changed, 42 insertions(+), 21 deletions(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 19a58f06feba..ec939dd9db33 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -3777,7 +3777,8 @@ void CGOpenMPRuntime::emitTaskCall( void CGOpenMPRuntime::emitTaskLoopCall( CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, - bool Tied, llvm::PointerIntPair Final, bool Nogroup, + bool Tied, llvm::PointerIntPair Final, + llvm::PointerIntPair Schedule, bool Nogroup, unsigned NumberOfParts, llvm::Value *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, ArrayRef PrivateVars, ArrayRef PrivateCopies, @@ -3825,17 +3826,19 @@ void CGOpenMPRuntime::emitTaskLoopCall( cast(cast(D.getStrideVariable())->getDecl()); CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(), /*IsInitializer=*/true); + enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; llvm::Value *TaskArgs[] = { - UpLoc, - ThreadID, - Data.NewTask, - IfVal, - LBLVal.getPointer(), - UBLVal.getPointer(), - CGF.EmitLoadOfScalar(StLVal, SourceLocation()), + UpLoc, ThreadID, Data.NewTask, IfVal, LBLVal.getPointer(), + UBLVal.getPointer(), CGF.EmitLoadOfScalar(StLVal, SourceLocation()), llvm::ConstantInt::getSigned(CGF.IntTy, Nogroup ? 1 : 0), - llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/0), - llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), + llvm::ConstantInt::getSigned( + CGF.IntTy, Schedule.getPointer() + ? Schedule.getInt() ? NumTasks : Grainsize + : NoSchedule), + Schedule.getPointer() + ? CGF.Builder.CreateIntCast(Schedule.getPointer(), CGF.Int64Ty, + /*isSigned=*/false) + : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs); } diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h index 68fe1ccc981a..19eb3622f0a1 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.h +++ b/clang/lib/CodeGen/CGOpenMPRuntime.h @@ -858,6 +858,9 @@ public: /// \param Final Contains either constant bool value, or llvm::Value * of i1 /// type for final clause. If the value is true, the task forces all of its /// child tasks to become final and included tasks. + /// \param Schedule If Pointer is nullptr, no grainsize/num_tasks clauses were + /// specified. If IntVal is false - it is for grainsize clause, true - for + /// num_tasks clause. /// \param Nogroup true if nogroup clause was specified, false otherwise. /// \param NumberOfParts Number of parts in untied taskloops. /// \param TaskFunction An LLVM function with type void (*)(i32 /*gtid*/, i32 @@ -881,9 +884,10 @@ public: virtual void emitTaskLoopCall( CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, bool Tied, llvm::PointerIntPair Final, - bool Nogroup, unsigned NumberOfParts, llvm::Value *TaskFunction, - QualType SharedsTy, Address Shareds, const Expr *IfCond, - ArrayRef PrivateVars, ArrayRef PrivateCopies, + llvm::PointerIntPair Schedule, bool Nogroup, + unsigned NumberOfParts, llvm::Value *TaskFunction, QualType SharedsTy, + Address Shareds, const Expr *IfCond, ArrayRef PrivateVars, + ArrayRef PrivateCopies, ArrayRef FirstprivateVars, ArrayRef FirstprivateCopies, ArrayRef FirstprivateInits); diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index 9a9fdfcb7f43..ae783f5dde74 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -3362,6 +3362,18 @@ void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) { // By default the task is not final. Final.setInt(/*IntVal=*/false); } + llvm::PointerIntPair + Schedule; + if (const auto* Clause = S.getSingleClause()) { + // grainsize clause + Schedule.setInt(/*IntVal=*/false); + Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize())); + } else if (const auto* Clause = S.getSingleClause()) { + // num_tasks clause + Schedule.setInt(/*IntVal=*/true); + Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks())); + } auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) { // if (PreCond) { @@ -3433,13 +3445,13 @@ void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) { CGF.EmitBlock(ContBlock, true); } }; - auto &&TaskGen = [&S, SharedsTy, CapturedStruct, IfCond, &Final, + auto &&TaskGen = [&S, SharedsTy, CapturedStruct, IfCond, &Final, &Schedule, Nogroup](CodeGenFunction &CGF, llvm::Value *OutlinedFn, const OMPPrivateDataTy &Data) { auto &&CodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &) { OMPLoopScope PreInitScope(CGF, S); CGF.CGM.getOpenMPRuntime().emitTaskLoopCall( - CGF, S.getLocStart(), S, Data.Tied, Final, Nogroup, + CGF, S.getLocStart(), S, Data.Tied, Final, Schedule, Nogroup, Data.NumberOfParts, OutlinedFn, SharedsTy, CapturedStruct, IfCond, Data.PrivateVars, Data.PrivateCopies, Data.FirstprivateVars, Data.FirstprivateCopies, Data.FirstprivateInits); diff --git a/clang/test/OpenMP/taskloop_codegen.cpp b/clang/test/OpenMP/taskloop_codegen.cpp index f3ae1b122089..02a5bf1af7ab 100644 --- a/clang/test/OpenMP/taskloop_codegen.cpp +++ b/clang/test/OpenMP/taskloop_codegen.cpp @@ -37,8 +37,9 @@ int main(int argc, char **argv) { // CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 6 // CHECK: store i64 1, i64* [[ST]], // CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]], -// CHECK: call void @__kmpc_taskloop(%ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 1, i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 1, i32 0, i64 0, i8* null) -#pragma omp taskloop nogroup +// CHECK: [[GRAINSIZE:%.+]] = zext i32 %{{.+}} to i64 +// CHECK: call void @__kmpc_taskloop(%ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 1, i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 1, i32 1, i64 [[GRAINSIZE]], i8* null) +#pragma omp taskloop nogroup grainsize(argc) for (int i = 0; i < 10; ++i) ; // CHECK: [[TASKV:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* [[DEFLOC]], i32 [[GTID]], i32 1, i64 64, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TDP_TY:%.+]]*)* [[TASK3:@.+]] to i32 (i32, i8*)*)) @@ -55,9 +56,9 @@ int main(int argc, char **argv) { // CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 6 // CHECK: store i64 1, i64* [[ST]], // CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]], -// CHECK: call void @__kmpc_taskloop(%ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 [[IF_INT]], i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 0, i32 0, i64 0, i8* null) +// CHECK: call void @__kmpc_taskloop(%ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 [[IF_INT]], i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 0, i32 2, i64 4, i8* null) int i; -#pragma omp taskloop if(argc) shared(argc, argv) collapse(2) +#pragma omp taskloop if(argc) shared(argc, argv) collapse(2) num_tasks(4) for (i = 0; i < argc; ++i) for (int j = argc; j < argv[argc][argc]; ++j) ; @@ -158,8 +159,9 @@ struct S { // CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 6 // CHECK: store i64 1, i64* [[ST]], // CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]], -// CHECK: call void @__kmpc_taskloop(%ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 1, i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 0, i32 0, i64 0, i8* null) -#pragma omp taskloop shared(c) +// CHECK: [[NUM_TASKS:%.+]] = zext i32 %{{.+}} to i64 +// CHECK: call void @__kmpc_taskloop(%ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 1, i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 0, i32 2, i64 [[NUM_TASKS]], i8* null) +#pragma omp taskloop shared(c) num_tasks(a) for (a = 0; a < c; ++a) ; }