[OPENMP] Generalization of calls of the outlined functions.
General improvement of the outlined functions calls. llvm-svn: 310840
This commit is contained in:
parent
e1dde07640
commit
3c595a6b2c
|
@ -2447,7 +2447,7 @@ void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
|
|||
OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
|
||||
OutlinedFnArgs.push_back(ZeroAddr.getPointer());
|
||||
OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
|
||||
RT.emitOutlinedFunctionCall(CGF, OutlinedFn, OutlinedFnArgs);
|
||||
RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
|
||||
|
||||
// __kmpc_end_serialized_parallel(&Loc, GTid);
|
||||
llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
|
||||
|
@ -3348,14 +3348,14 @@ CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() {
|
|||
auto *UnRegFn = createOffloadingBinaryDescriptorFunction(
|
||||
CGM, ".omp_offloading.descriptor_unreg",
|
||||
[&](CodeGenFunction &CGF, PrePostActionTy &) {
|
||||
CGF.EmitCallOrInvoke(createRuntimeFunction(OMPRTL__tgt_unregister_lib),
|
||||
Desc);
|
||||
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_unregister_lib),
|
||||
Desc);
|
||||
});
|
||||
auto *RegFn = createOffloadingBinaryDescriptorFunction(
|
||||
CGM, ".omp_offloading.descriptor_reg",
|
||||
[&](CodeGenFunction &CGF, PrePostActionTy &) {
|
||||
CGF.EmitCallOrInvoke(createRuntimeFunction(OMPRTL__tgt_register_lib),
|
||||
Desc);
|
||||
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_lib),
|
||||
Desc);
|
||||
CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc);
|
||||
});
|
||||
if (CGM.supportsCOMDAT()) {
|
||||
|
@ -3859,7 +3859,8 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
|
|||
}
|
||||
CallArgs.push_back(SharedsParam);
|
||||
|
||||
CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, TaskFunction, CallArgs);
|
||||
CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
|
||||
CallArgs);
|
||||
CGF.EmitStoreThroughLValue(
|
||||
RValue::get(CGF.Builder.getInt32(/*C=*/0)),
|
||||
CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
|
||||
|
@ -4534,8 +4535,8 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
|
|||
DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
|
||||
}
|
||||
auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry,
|
||||
NumDependencies, &DepWaitTaskArgs](CodeGenFunction &CGF,
|
||||
PrePostActionTy &) {
|
||||
NumDependencies, &DepWaitTaskArgs,
|
||||
Loc](CodeGenFunction &CGF, PrePostActionTy &) {
|
||||
auto &RT = CGF.CGM.getOpenMPRuntime();
|
||||
CodeGenFunction::RunCleanupsScope LocalScope(CGF);
|
||||
// Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
|
||||
|
@ -4546,11 +4547,11 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
|
|||
CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps),
|
||||
DepWaitTaskArgs);
|
||||
// Call proxy_task_entry(gtid, new_task);
|
||||
auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy](
|
||||
CodeGenFunction &CGF, PrePostActionTy &Action) {
|
||||
auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
|
||||
Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
|
||||
Action.Enter(CGF);
|
||||
llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
|
||||
CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, TaskEntry,
|
||||
CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
|
||||
OutlinedFnArgs);
|
||||
};
|
||||
|
||||
|
@ -7035,7 +7036,7 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
|
|||
CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
|
||||
|
||||
CGF.EmitBlock(OffloadFailedBlock);
|
||||
emitOutlinedFunctionCall(CGF, OutlinedFn, KernelArgs);
|
||||
emitOutlinedFunctionCall(CGF, D.getLocStart(), OutlinedFn, KernelArgs);
|
||||
CGF.EmitBranch(OffloadContBlock);
|
||||
|
||||
CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
|
||||
|
@ -7755,16 +7756,25 @@ void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
|
|||
CGF.EmitRuntimeCall(RTLFn, Args);
|
||||
}
|
||||
|
||||
void CGOpenMPRuntime::emitOutlinedFunctionCall(
|
||||
CodeGenFunction &CGF, llvm::Value *OutlinedFn,
|
||||
ArrayRef<llvm::Value *> Args) const {
|
||||
if (auto *Fn = dyn_cast<llvm::Function>(OutlinedFn)) {
|
||||
void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, llvm::Value *Callee,
|
||||
ArrayRef<llvm::Value *> Args,
|
||||
SourceLocation Loc) const {
|
||||
auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
|
||||
|
||||
if (auto *Fn = dyn_cast<llvm::Function>(Callee)) {
|
||||
if (Fn->doesNotThrow()) {
|
||||
CGF.EmitNounwindRuntimeCall(OutlinedFn, Args);
|
||||
CGF.EmitNounwindRuntimeCall(Fn, Args);
|
||||
return;
|
||||
}
|
||||
}
|
||||
CGF.EmitRuntimeCall(OutlinedFn, Args);
|
||||
CGF.EmitRuntimeCall(Callee, Args);
|
||||
}
|
||||
|
||||
void CGOpenMPRuntime::emitOutlinedFunctionCall(
|
||||
CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn,
|
||||
ArrayRef<llvm::Value *> Args) const {
|
||||
assert(Loc.isValid() && "Outlined function call location must be valid.");
|
||||
emitCall(CGF, OutlinedFn, Args, Loc);
|
||||
}
|
||||
|
||||
Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
|
||||
|
|
|
@ -250,6 +250,11 @@ protected:
|
|||
//
|
||||
virtual StringRef getOutlinedHelperName() const { return ".omp_outlined."; }
|
||||
|
||||
/// Emits \p Callee function call with arguments \p Args with location \p Loc.
|
||||
void emitCall(CodeGenFunction &CGF, llvm::Value *Callee,
|
||||
ArrayRef<llvm::Value *> Args = llvm::None,
|
||||
SourceLocation Loc = SourceLocation()) const;
|
||||
|
||||
private:
|
||||
/// \brief Default const ident_t object used for initialization of all other
|
||||
/// ident_t objects.
|
||||
|
@ -1345,7 +1350,8 @@ public:
|
|||
/// Emits call of the outlined function with the provided arguments,
|
||||
/// translating these arguments to correct target-specific arguments.
|
||||
virtual void
|
||||
emitOutlinedFunctionCall(CodeGenFunction &CGF, llvm::Value *OutlinedFn,
|
||||
emitOutlinedFunctionCall(CodeGenFunction &CGF, SourceLocation Loc,
|
||||
llvm::Value *OutlinedFn,
|
||||
ArrayRef<llvm::Value *> Args = llvm::None) const;
|
||||
};
|
||||
|
||||
|
|
|
@ -150,20 +150,18 @@ enum NamedBarrier : unsigned {
|
|||
|
||||
/// Get the GPU warp size.
|
||||
static llvm::Value *getNVPTXWarpSize(CodeGenFunction &CGF) {
|
||||
CGBuilderTy &Bld = CGF.Builder;
|
||||
return Bld.CreateCall(
|
||||
return CGF.EmitRuntimeCall(
|
||||
llvm::Intrinsic::getDeclaration(
|
||||
&CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_warpsize),
|
||||
llvm::None, "nvptx_warp_size");
|
||||
"nvptx_warp_size");
|
||||
}
|
||||
|
||||
/// Get the id of the current thread on the GPU.
|
||||
static llvm::Value *getNVPTXThreadID(CodeGenFunction &CGF) {
|
||||
CGBuilderTy &Bld = CGF.Builder;
|
||||
return Bld.CreateCall(
|
||||
return CGF.EmitRuntimeCall(
|
||||
llvm::Intrinsic::getDeclaration(
|
||||
&CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_tid_x),
|
||||
llvm::None, "nvptx_tid");
|
||||
"nvptx_tid");
|
||||
}
|
||||
|
||||
/// Get the id of the warp in the block.
|
||||
|
@ -185,17 +183,15 @@ static llvm::Value *getNVPTXLaneID(CodeGenFunction &CGF) {
|
|||
|
||||
/// Get the maximum number of threads in a block of the GPU.
|
||||
static llvm::Value *getNVPTXNumThreads(CodeGenFunction &CGF) {
|
||||
CGBuilderTy &Bld = CGF.Builder;
|
||||
return Bld.CreateCall(
|
||||
return CGF.EmitRuntimeCall(
|
||||
llvm::Intrinsic::getDeclaration(
|
||||
&CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_ntid_x),
|
||||
llvm::None, "nvptx_num_threads");
|
||||
"nvptx_num_threads");
|
||||
}
|
||||
|
||||
/// Get barrier to synchronize all threads in a block.
|
||||
static void getNVPTXCTABarrier(CodeGenFunction &CGF) {
|
||||
CGBuilderTy &Bld = CGF.Builder;
|
||||
Bld.CreateCall(llvm::Intrinsic::getDeclaration(
|
||||
CGF.EmitRuntimeCall(llvm::Intrinsic::getDeclaration(
|
||||
&CGF.CGM.getModule(), llvm::Intrinsic::nvvm_barrier0));
|
||||
}
|
||||
|
||||
|
@ -205,9 +201,9 @@ static void getNVPTXBarrier(CodeGenFunction &CGF, int ID,
|
|||
llvm::Value *NumThreads) {
|
||||
CGBuilderTy &Bld = CGF.Builder;
|
||||
llvm::Value *Args[] = {Bld.getInt32(ID), NumThreads};
|
||||
Bld.CreateCall(llvm::Intrinsic::getDeclaration(&CGF.CGM.getModule(),
|
||||
llvm::Intrinsic::nvvm_barrier),
|
||||
Args);
|
||||
CGF.EmitRuntimeCall(llvm::Intrinsic::getDeclaration(
|
||||
&CGF.CGM.getModule(), llvm::Intrinsic::nvvm_barrier),
|
||||
Args);
|
||||
}
|
||||
|
||||
/// Synchronize all GPU threads in a block.
|
||||
|
@ -345,7 +341,7 @@ void CGOpenMPRuntimeNVPTX::emitGenericEntryHeader(CodeGenFunction &CGF,
|
|||
Bld.CreateCondBr(IsWorker, WorkerBB, MasterCheckBB);
|
||||
|
||||
CGF.EmitBlock(WorkerBB);
|
||||
emitOutlinedFunctionCall(CGF, WST.WorkerFn);
|
||||
emitCall(CGF, WST.WorkerFn);
|
||||
CGF.EmitBranch(EST.ExitBB);
|
||||
|
||||
CGF.EmitBlock(MasterCheckBB);
|
||||
|
@ -555,7 +551,7 @@ void CGOpenMPRuntimeNVPTX::emitWorkerLoop(CodeGenFunction &CGF,
|
|||
CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, /*Name=*/".zero.addr");
|
||||
CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C=*/0));
|
||||
llvm::Value *FnArgs[] = {ZeroAddr.getPointer(), ZeroAddr.getPointer()};
|
||||
emitOutlinedFunctionCall(CGF, Fn, FnArgs);
|
||||
emitCall(CGF, Fn, FnArgs);
|
||||
|
||||
// Go to end of parallel region.
|
||||
CGF.EmitBranch(TerminateBB);
|
||||
|
@ -883,7 +879,7 @@ void CGOpenMPRuntimeNVPTX::emitTeamsCall(CodeGenFunction &CGF,
|
|||
OutlinedFnArgs.push_back(ZeroAddr.getPointer());
|
||||
OutlinedFnArgs.push_back(ZeroAddr.getPointer());
|
||||
OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
|
||||
emitOutlinedFunctionCall(CGF, OutlinedFn, OutlinedFnArgs);
|
||||
emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
|
||||
}
|
||||
|
||||
void CGOpenMPRuntimeNVPTX::emitParallelCall(
|
||||
|
@ -932,10 +928,10 @@ void CGOpenMPRuntimeNVPTX::emitGenericParallelCall(
|
|||
auto *ThreadID = getThreadID(CGF, Loc);
|
||||
llvm::Value *Args[] = {RTLoc, ThreadID};
|
||||
|
||||
auto &&SeqGen = [this, Fn, &CapturedVars, &Args](CodeGenFunction &CGF,
|
||||
PrePostActionTy &) {
|
||||
auto &&CodeGen = [this, Fn, &CapturedVars](CodeGenFunction &CGF,
|
||||
PrePostActionTy &Action) {
|
||||
auto &&SeqGen = [this, Fn, &CapturedVars, &Args, Loc](CodeGenFunction &CGF,
|
||||
PrePostActionTy &) {
|
||||
auto &&CodeGen = [this, Fn, &CapturedVars, Loc](CodeGenFunction &CGF,
|
||||
PrePostActionTy &Action) {
|
||||
Action.Enter(CGF);
|
||||
|
||||
llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
|
||||
|
@ -944,7 +940,7 @@ void CGOpenMPRuntimeNVPTX::emitGenericParallelCall(
|
|||
OutlinedFnArgs.push_back(
|
||||
llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo()));
|
||||
OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
|
||||
emitOutlinedFunctionCall(CGF, Fn, OutlinedFnArgs);
|
||||
emitOutlinedFunctionCall(CGF, Loc, Fn, OutlinedFnArgs);
|
||||
};
|
||||
|
||||
RegionCodeGenTy RCG(CodeGen);
|
||||
|
@ -980,7 +976,7 @@ void CGOpenMPRuntimeNVPTX::emitSpmdParallelCall(
|
|||
OutlinedFnArgs.push_back(
|
||||
llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo()));
|
||||
OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
|
||||
emitOutlinedFunctionCall(CGF, OutlinedFn, OutlinedFnArgs);
|
||||
emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
|
||||
}
|
||||
|
||||
/// This function creates calls to one of two shuffle functions to copy
|
||||
|
@ -2296,7 +2292,7 @@ CGOpenMPRuntimeNVPTX::getParameterAddress(CodeGenFunction &CGF,
|
|||
}
|
||||
|
||||
void CGOpenMPRuntimeNVPTX::emitOutlinedFunctionCall(
|
||||
CodeGenFunction &CGF, llvm::Value *OutlinedFn,
|
||||
CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn,
|
||||
ArrayRef<llvm::Value *> Args) const {
|
||||
SmallVector<llvm::Value *, 4> TargetArgs;
|
||||
auto *FnType =
|
||||
|
@ -2314,5 +2310,5 @@ void CGOpenMPRuntimeNVPTX::emitOutlinedFunctionCall(
|
|||
TargetArgs.emplace_back(
|
||||
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TargetArg, TargetType));
|
||||
}
|
||||
CGOpenMPRuntime::emitOutlinedFunctionCall(CGF, OutlinedFn, TargetArgs);
|
||||
CGOpenMPRuntime::emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, TargetArgs);
|
||||
}
|
||||
|
|
|
@ -285,7 +285,7 @@ public:
|
|||
/// Emits call of the outlined function with the provided arguments,
|
||||
/// translating these arguments to correct target-specific arguments.
|
||||
void emitOutlinedFunctionCall(
|
||||
CodeGenFunction &CGF, llvm::Value *OutlinedFn,
|
||||
CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn,
|
||||
ArrayRef<llvm::Value *> Args = llvm::None) const override;
|
||||
|
||||
/// Target codegen is specialized based on two programming models: the
|
||||
|
|
|
@ -473,7 +473,6 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) {
|
|||
/*RegisterCastedArgsOnly=*/true,
|
||||
CapturedStmtInfo->getHelperName());
|
||||
CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true);
|
||||
WrapperCGF.disableDebugInfo();
|
||||
Args.clear();
|
||||
LocalAddrs.clear();
|
||||
VLASizes.clear();
|
||||
|
@ -501,7 +500,8 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) {
|
|||
}
|
||||
CallArgs.emplace_back(CallArg);
|
||||
}
|
||||
CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, F, CallArgs);
|
||||
CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, S.getLocStart(),
|
||||
F, CallArgs);
|
||||
WrapperCGF.FinishFunction();
|
||||
return WrapperF;
|
||||
}
|
||||
|
@ -2749,6 +2749,7 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S,
|
|||
OMPPrivateScope Scope(CGF);
|
||||
if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() ||
|
||||
!Data.LastprivateVars.empty()) {
|
||||
enum { PrivatesParam = 2, CopyFnParam = 3 };
|
||||
auto *CopyFn = CGF.Builder.CreateLoad(
|
||||
CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3)));
|
||||
auto *PrivatesPtr = CGF.Builder.CreateLoad(
|
||||
|
@ -2780,7 +2781,8 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S,
|
|||
PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
|
||||
CallArgs.push_back(PrivatePtr.getPointer());
|
||||
}
|
||||
CGF.EmitRuntimeCall(CopyFn, CallArgs);
|
||||
CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getLocStart(),
|
||||
CopyFn, CallArgs);
|
||||
for (auto &&Pair : LastprivateDstsOrigs) {
|
||||
auto *OrigVD = cast<VarDecl>(Pair.second->getDecl());
|
||||
DeclRefExpr DRE(
|
||||
|
@ -3170,8 +3172,8 @@ void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
|
|||
llvm::SmallVector<llvm::Value *, 16> CapturedVars;
|
||||
CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
|
||||
auto *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS);
|
||||
CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, OutlinedFn,
|
||||
CapturedVars);
|
||||
CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getLocStart(),
|
||||
OutlinedFn, CapturedVars);
|
||||
} else {
|
||||
Action.Enter(CGF);
|
||||
CGF.EmitStmt(
|
||||
|
|
|
@ -122,7 +122,7 @@ int main (int argc, char **argv) {
|
|||
// CHECK: define linkonce_odr {{.*}}void [[FOO1]](i8** %argc)
|
||||
// CHECK-DEBUG-DAG: define linkonce_odr void [[FOO1]](i8** %argc)
|
||||
// CHECK-DEBUG-DAG: define internal void [[OMP_OUTLINED]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i8*** dereferenceable({{4|8}}) %argc)
|
||||
// CHECK-DEBUG-DAG: call void [[OMP_OUTLINED_DEBUG]]
|
||||
// CHECK-DEBUG-DAG: call void [[OMP_OUTLINED_DEBUG]]({{[^)]+}}){{[^,]*}}, !dbg
|
||||
|
||||
// CHECK: attributes #[[FN_ATTRS]] = {{.+}} nounwind
|
||||
// CHECK-DEBUG: attributes #[[FN_ATTRS]] = {{.+}} nounwind
|
||||
|
|
Loading…
Reference in New Issue