[OPENMP] Generalization of calls of the outlined functions.

General improvement of the outlined functions calls.

llvm-svn: 310840
This commit is contained in:
Alexey Bataev 2017-08-14 15:01:03 +00:00
parent e1dde07640
commit 3c595a6b2c
6 changed files with 65 additions and 51 deletions

View File

@ -2447,7 +2447,7 @@ void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
OutlinedFnArgs.push_back(ZeroAddr.getPointer());
OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
RT.emitOutlinedFunctionCall(CGF, OutlinedFn, OutlinedFnArgs);
RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
// __kmpc_end_serialized_parallel(&Loc, GTid);
llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
@ -3348,14 +3348,14 @@ CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() {
auto *UnRegFn = createOffloadingBinaryDescriptorFunction(
CGM, ".omp_offloading.descriptor_unreg",
[&](CodeGenFunction &CGF, PrePostActionTy &) {
CGF.EmitCallOrInvoke(createRuntimeFunction(OMPRTL__tgt_unregister_lib),
Desc);
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_unregister_lib),
Desc);
});
auto *RegFn = createOffloadingBinaryDescriptorFunction(
CGM, ".omp_offloading.descriptor_reg",
[&](CodeGenFunction &CGF, PrePostActionTy &) {
CGF.EmitCallOrInvoke(createRuntimeFunction(OMPRTL__tgt_register_lib),
Desc);
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_lib),
Desc);
CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc);
});
if (CGM.supportsCOMDAT()) {
@ -3859,7 +3859,8 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
}
CallArgs.push_back(SharedsParam);
CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, TaskFunction, CallArgs);
CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
CallArgs);
CGF.EmitStoreThroughLValue(
RValue::get(CGF.Builder.getInt32(/*C=*/0)),
CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
@ -4534,8 +4535,8 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
}
auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry,
NumDependencies, &DepWaitTaskArgs](CodeGenFunction &CGF,
PrePostActionTy &) {
NumDependencies, &DepWaitTaskArgs,
Loc](CodeGenFunction &CGF, PrePostActionTy &) {
auto &RT = CGF.CGM.getOpenMPRuntime();
CodeGenFunction::RunCleanupsScope LocalScope(CGF);
// Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
@ -4546,11 +4547,11 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps),
DepWaitTaskArgs);
// Call proxy_task_entry(gtid, new_task);
auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy](
CodeGenFunction &CGF, PrePostActionTy &Action) {
auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
Action.Enter(CGF);
llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, TaskEntry,
CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
OutlinedFnArgs);
};
@ -7035,7 +7036,7 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
CGF.EmitBlock(OffloadFailedBlock);
emitOutlinedFunctionCall(CGF, OutlinedFn, KernelArgs);
emitOutlinedFunctionCall(CGF, D.getLocStart(), OutlinedFn, KernelArgs);
CGF.EmitBranch(OffloadContBlock);
CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
@ -7755,16 +7756,25 @@ void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
CGF.EmitRuntimeCall(RTLFn, Args);
}
void CGOpenMPRuntime::emitOutlinedFunctionCall(
CodeGenFunction &CGF, llvm::Value *OutlinedFn,
ArrayRef<llvm::Value *> Args) const {
if (auto *Fn = dyn_cast<llvm::Function>(OutlinedFn)) {
void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, llvm::Value *Callee,
ArrayRef<llvm::Value *> Args,
SourceLocation Loc) const {
auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
if (auto *Fn = dyn_cast<llvm::Function>(Callee)) {
if (Fn->doesNotThrow()) {
CGF.EmitNounwindRuntimeCall(OutlinedFn, Args);
CGF.EmitNounwindRuntimeCall(Fn, Args);
return;
}
}
CGF.EmitRuntimeCall(OutlinedFn, Args);
CGF.EmitRuntimeCall(Callee, Args);
}
void CGOpenMPRuntime::emitOutlinedFunctionCall(
CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn,
ArrayRef<llvm::Value *> Args) const {
assert(Loc.isValid() && "Outlined function call location must be valid.");
emitCall(CGF, OutlinedFn, Args, Loc);
}
Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,

View File

@ -250,6 +250,11 @@ protected:
//
virtual StringRef getOutlinedHelperName() const { return ".omp_outlined."; }
/// Emits \p Callee function call with arguments \p Args with location \p Loc.
void emitCall(CodeGenFunction &CGF, llvm::Value *Callee,
ArrayRef<llvm::Value *> Args = llvm::None,
SourceLocation Loc = SourceLocation()) const;
private:
/// \brief Default const ident_t object used for initialization of all other
/// ident_t objects.
@ -1345,7 +1350,8 @@ public:
/// Emits call of the outlined function with the provided arguments,
/// translating these arguments to correct target-specific arguments.
virtual void
emitOutlinedFunctionCall(CodeGenFunction &CGF, llvm::Value *OutlinedFn,
emitOutlinedFunctionCall(CodeGenFunction &CGF, SourceLocation Loc,
llvm::Value *OutlinedFn,
ArrayRef<llvm::Value *> Args = llvm::None) const;
};

View File

@ -150,20 +150,18 @@ enum NamedBarrier : unsigned {
/// Get the GPU warp size.
static llvm::Value *getNVPTXWarpSize(CodeGenFunction &CGF) {
CGBuilderTy &Bld = CGF.Builder;
return Bld.CreateCall(
return CGF.EmitRuntimeCall(
llvm::Intrinsic::getDeclaration(
&CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_warpsize),
llvm::None, "nvptx_warp_size");
"nvptx_warp_size");
}
/// Get the id of the current thread on the GPU.
static llvm::Value *getNVPTXThreadID(CodeGenFunction &CGF) {
CGBuilderTy &Bld = CGF.Builder;
return Bld.CreateCall(
return CGF.EmitRuntimeCall(
llvm::Intrinsic::getDeclaration(
&CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_tid_x),
llvm::None, "nvptx_tid");
"nvptx_tid");
}
/// Get the id of the warp in the block.
@ -185,17 +183,15 @@ static llvm::Value *getNVPTXLaneID(CodeGenFunction &CGF) {
/// Get the maximum number of threads in a block of the GPU.
static llvm::Value *getNVPTXNumThreads(CodeGenFunction &CGF) {
CGBuilderTy &Bld = CGF.Builder;
return Bld.CreateCall(
return CGF.EmitRuntimeCall(
llvm::Intrinsic::getDeclaration(
&CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_ntid_x),
llvm::None, "nvptx_num_threads");
"nvptx_num_threads");
}
/// Get barrier to synchronize all threads in a block.
static void getNVPTXCTABarrier(CodeGenFunction &CGF) {
CGBuilderTy &Bld = CGF.Builder;
Bld.CreateCall(llvm::Intrinsic::getDeclaration(
CGF.EmitRuntimeCall(llvm::Intrinsic::getDeclaration(
&CGF.CGM.getModule(), llvm::Intrinsic::nvvm_barrier0));
}
@ -205,9 +201,9 @@ static void getNVPTXBarrier(CodeGenFunction &CGF, int ID,
llvm::Value *NumThreads) {
CGBuilderTy &Bld = CGF.Builder;
llvm::Value *Args[] = {Bld.getInt32(ID), NumThreads};
Bld.CreateCall(llvm::Intrinsic::getDeclaration(&CGF.CGM.getModule(),
llvm::Intrinsic::nvvm_barrier),
Args);
CGF.EmitRuntimeCall(llvm::Intrinsic::getDeclaration(
&CGF.CGM.getModule(), llvm::Intrinsic::nvvm_barrier),
Args);
}
/// Synchronize all GPU threads in a block.
@ -345,7 +341,7 @@ void CGOpenMPRuntimeNVPTX::emitGenericEntryHeader(CodeGenFunction &CGF,
Bld.CreateCondBr(IsWorker, WorkerBB, MasterCheckBB);
CGF.EmitBlock(WorkerBB);
emitOutlinedFunctionCall(CGF, WST.WorkerFn);
emitCall(CGF, WST.WorkerFn);
CGF.EmitBranch(EST.ExitBB);
CGF.EmitBlock(MasterCheckBB);
@ -555,7 +551,7 @@ void CGOpenMPRuntimeNVPTX::emitWorkerLoop(CodeGenFunction &CGF,
CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, /*Name=*/".zero.addr");
CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C=*/0));
llvm::Value *FnArgs[] = {ZeroAddr.getPointer(), ZeroAddr.getPointer()};
emitOutlinedFunctionCall(CGF, Fn, FnArgs);
emitCall(CGF, Fn, FnArgs);
// Go to end of parallel region.
CGF.EmitBranch(TerminateBB);
@ -883,7 +879,7 @@ void CGOpenMPRuntimeNVPTX::emitTeamsCall(CodeGenFunction &CGF,
OutlinedFnArgs.push_back(ZeroAddr.getPointer());
OutlinedFnArgs.push_back(ZeroAddr.getPointer());
OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
emitOutlinedFunctionCall(CGF, OutlinedFn, OutlinedFnArgs);
emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
}
void CGOpenMPRuntimeNVPTX::emitParallelCall(
@ -932,10 +928,10 @@ void CGOpenMPRuntimeNVPTX::emitGenericParallelCall(
auto *ThreadID = getThreadID(CGF, Loc);
llvm::Value *Args[] = {RTLoc, ThreadID};
auto &&SeqGen = [this, Fn, &CapturedVars, &Args](CodeGenFunction &CGF,
PrePostActionTy &) {
auto &&CodeGen = [this, Fn, &CapturedVars](CodeGenFunction &CGF,
PrePostActionTy &Action) {
auto &&SeqGen = [this, Fn, &CapturedVars, &Args, Loc](CodeGenFunction &CGF,
PrePostActionTy &) {
auto &&CodeGen = [this, Fn, &CapturedVars, Loc](CodeGenFunction &CGF,
PrePostActionTy &Action) {
Action.Enter(CGF);
llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
@ -944,7 +940,7 @@ void CGOpenMPRuntimeNVPTX::emitGenericParallelCall(
OutlinedFnArgs.push_back(
llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo()));
OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
emitOutlinedFunctionCall(CGF, Fn, OutlinedFnArgs);
emitOutlinedFunctionCall(CGF, Loc, Fn, OutlinedFnArgs);
};
RegionCodeGenTy RCG(CodeGen);
@ -980,7 +976,7 @@ void CGOpenMPRuntimeNVPTX::emitSpmdParallelCall(
OutlinedFnArgs.push_back(
llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo()));
OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
emitOutlinedFunctionCall(CGF, OutlinedFn, OutlinedFnArgs);
emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
}
/// This function creates calls to one of two shuffle functions to copy
@ -2296,7 +2292,7 @@ CGOpenMPRuntimeNVPTX::getParameterAddress(CodeGenFunction &CGF,
}
void CGOpenMPRuntimeNVPTX::emitOutlinedFunctionCall(
CodeGenFunction &CGF, llvm::Value *OutlinedFn,
CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn,
ArrayRef<llvm::Value *> Args) const {
SmallVector<llvm::Value *, 4> TargetArgs;
auto *FnType =
@ -2314,5 +2310,5 @@ void CGOpenMPRuntimeNVPTX::emitOutlinedFunctionCall(
TargetArgs.emplace_back(
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TargetArg, TargetType));
}
CGOpenMPRuntime::emitOutlinedFunctionCall(CGF, OutlinedFn, TargetArgs);
CGOpenMPRuntime::emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, TargetArgs);
}

View File

@ -285,7 +285,7 @@ public:
/// Emits call of the outlined function with the provided arguments,
/// translating these arguments to correct target-specific arguments.
void emitOutlinedFunctionCall(
CodeGenFunction &CGF, llvm::Value *OutlinedFn,
CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn,
ArrayRef<llvm::Value *> Args = llvm::None) const override;
/// Target codegen is specialized based on two programming models: the

View File

@ -473,7 +473,6 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) {
/*RegisterCastedArgsOnly=*/true,
CapturedStmtInfo->getHelperName());
CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true);
WrapperCGF.disableDebugInfo();
Args.clear();
LocalAddrs.clear();
VLASizes.clear();
@ -501,7 +500,8 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) {
}
CallArgs.emplace_back(CallArg);
}
CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, F, CallArgs);
CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, S.getLocStart(),
F, CallArgs);
WrapperCGF.FinishFunction();
return WrapperF;
}
@ -2749,6 +2749,7 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S,
OMPPrivateScope Scope(CGF);
if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() ||
!Data.LastprivateVars.empty()) {
enum { PrivatesParam = 2, CopyFnParam = 3 };
auto *CopyFn = CGF.Builder.CreateLoad(
CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3)));
auto *PrivatesPtr = CGF.Builder.CreateLoad(
@ -2780,7 +2781,8 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S,
PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
CallArgs.push_back(PrivatePtr.getPointer());
}
CGF.EmitRuntimeCall(CopyFn, CallArgs);
CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getLocStart(),
CopyFn, CallArgs);
for (auto &&Pair : LastprivateDstsOrigs) {
auto *OrigVD = cast<VarDecl>(Pair.second->getDecl());
DeclRefExpr DRE(
@ -3170,8 +3172,8 @@ void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
llvm::SmallVector<llvm::Value *, 16> CapturedVars;
CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
auto *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS);
CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, OutlinedFn,
CapturedVars);
CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getLocStart(),
OutlinedFn, CapturedVars);
} else {
Action.Enter(CGF);
CGF.EmitStmt(

View File

@ -122,7 +122,7 @@ int main (int argc, char **argv) {
// CHECK: define linkonce_odr {{.*}}void [[FOO1]](i8** %argc)
// CHECK-DEBUG-DAG: define linkonce_odr void [[FOO1]](i8** %argc)
// CHECK-DEBUG-DAG: define internal void [[OMP_OUTLINED]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i8*** dereferenceable({{4|8}}) %argc)
// CHECK-DEBUG-DAG: call void [[OMP_OUTLINED_DEBUG]]
// CHECK-DEBUG-DAG: call void [[OMP_OUTLINED_DEBUG]]({{[^)]+}}){{[^,]*}}, !dbg
// CHECK: attributes #[[FN_ATTRS]] = {{.+}} nounwind
// CHECK-DEBUG: attributes #[[FN_ATTRS]] = {{.+}} nounwind