Handle Invoke during sample profiler annotation: make it inlinable.
Summary: Previously we reline on inst-combine to remove inlinable invoke instructions. This causes trouble because a few extra optimizations are schedule early that could introduce too much CFG change (e.g. simplifycfg removes too much control flow). This patch handles invoke instruction in-place during sample profile annotation, so that we do not rely on instcombine to remove those invoke instructions. Reviewers: davidxl, dnovillo Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D24409 llvm-svn: 281870
This commit is contained in:
parent
e111710242
commit
41cde0b986
|
@ -112,7 +112,7 @@ protected:
|
||||||
bool emitAnnotations(Function &F);
|
bool emitAnnotations(Function &F);
|
||||||
ErrorOr<uint64_t> getInstWeight(const Instruction &I) const;
|
ErrorOr<uint64_t> getInstWeight(const Instruction &I) const;
|
||||||
ErrorOr<uint64_t> getBlockWeight(const BasicBlock *BB) const;
|
ErrorOr<uint64_t> getBlockWeight(const BasicBlock *BB) const;
|
||||||
const FunctionSamples *findCalleeFunctionSamples(const CallInst &I) const;
|
const FunctionSamples *findCalleeFunctionSamples(const Instruction &I) const;
|
||||||
const FunctionSamples *findFunctionSamples(const Instruction &I) const;
|
const FunctionSamples *findFunctionSamples(const Instruction &I) const;
|
||||||
bool inlineHotFunctions(Function &F);
|
bool inlineHotFunctions(Function &F);
|
||||||
void printEdgeWeight(raw_ostream &OS, Edge E);
|
void printEdgeWeight(raw_ostream &OS, Edge E);
|
||||||
|
@ -210,6 +210,7 @@ public:
|
||||||
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
||||||
AU.addRequired<AssumptionCacheTracker>();
|
AU.addRequired<AssumptionCacheTracker>();
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
SampleProfileLoader SampleLoader;
|
SampleProfileLoader SampleLoader;
|
||||||
};
|
};
|
||||||
|
@ -466,11 +467,11 @@ SampleProfileLoader::getInstWeight(const Instruction &Inst) const {
|
||||||
if (isa<BranchInst>(Inst) || isa<IntrinsicInst>(Inst))
|
if (isa<BranchInst>(Inst) || isa<IntrinsicInst>(Inst))
|
||||||
return std::error_code();
|
return std::error_code();
|
||||||
|
|
||||||
// If a call instruction is inlined in profile, but not inlined here,
|
// If a call/invoke instruction is inlined in profile, but not inlined here,
|
||||||
// it means that the inlined callsite has no sample, thus the call
|
// it means that the inlined callsite has no sample, thus the call
|
||||||
// instruction should have 0 count.
|
// instruction should have 0 count.
|
||||||
const CallInst *CI = dyn_cast<CallInst>(&Inst);
|
bool IsCall = isa<CallInst>(Inst) || isa<InvokeInst>(Inst);
|
||||||
if (CI && findCalleeFunctionSamples(*CI))
|
if (IsCall && findCalleeFunctionSamples(Inst))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
const DILocation *DIL = DLoc;
|
const DILocation *DIL = DLoc;
|
||||||
|
@ -513,9 +514,11 @@ SampleProfileLoader::getBlockWeight(const BasicBlock *BB) const {
|
||||||
DenseMap<uint64_t, uint64_t> CM;
|
DenseMap<uint64_t, uint64_t> CM;
|
||||||
for (auto &I : BB->getInstList()) {
|
for (auto &I : BB->getInstList()) {
|
||||||
const ErrorOr<uint64_t> &R = getInstWeight(I);
|
const ErrorOr<uint64_t> &R = getInstWeight(I);
|
||||||
if (R) CM[R.get()]++;
|
if (R)
|
||||||
|
CM[R.get()]++;
|
||||||
}
|
}
|
||||||
if (CM.size() == 0) return std::error_code();
|
if (CM.size() == 0)
|
||||||
|
return std::error_code();
|
||||||
uint64_t W = 0, C = 0;
|
uint64_t W = 0, C = 0;
|
||||||
for (const auto &C_W : CM) {
|
for (const auto &C_W : CM) {
|
||||||
if (C_W.second == W) {
|
if (C_W.second == W) {
|
||||||
|
@ -552,18 +555,18 @@ bool SampleProfileLoader::computeBlockWeights(Function &F) {
|
||||||
|
|
||||||
/// \brief Get the FunctionSamples for a call instruction.
|
/// \brief Get the FunctionSamples for a call instruction.
|
||||||
///
|
///
|
||||||
/// The FunctionSamples of a call instruction \p Inst is the inlined
|
/// The FunctionSamples of a call/invoke instruction \p Inst is the inlined
|
||||||
/// instance in which that call instruction is calling to. It contains
|
/// instance in which that call instruction is calling to. It contains
|
||||||
/// all samples that resides in the inlined instance. We first find the
|
/// all samples that resides in the inlined instance. We first find the
|
||||||
/// inlined instance in which the call instruction is from, then we
|
/// inlined instance in which the call instruction is from, then we
|
||||||
/// traverse its children to find the callsite with the matching
|
/// traverse its children to find the callsite with the matching
|
||||||
/// location and callee function name.
|
/// location.
|
||||||
///
|
///
|
||||||
/// \param Inst Call instruction to query.
|
/// \param Inst Call/Invoke instruction to query.
|
||||||
///
|
///
|
||||||
/// \returns The FunctionSamples pointer to the inlined instance.
|
/// \returns The FunctionSamples pointer to the inlined instance.
|
||||||
const FunctionSamples *
|
const FunctionSamples *
|
||||||
SampleProfileLoader::findCalleeFunctionSamples(const CallInst &Inst) const {
|
SampleProfileLoader::findCalleeFunctionSamples(const Instruction &Inst) const {
|
||||||
const DILocation *DIL = Inst.getDebugLoc();
|
const DILocation *DIL = Inst.getDebugLoc();
|
||||||
if (!DIL) {
|
if (!DIL) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
|
@ -612,7 +615,6 @@ SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {
|
||||||
return FS;
|
return FS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/// \brief Iteratively inline hot callsites of a function.
|
/// \brief Iteratively inline hot callsites of a function.
|
||||||
///
|
///
|
||||||
/// Iteratively traverse all callsites of the function \p F, and find if
|
/// Iteratively traverse all callsites of the function \p F, and find if
|
||||||
|
@ -632,20 +634,27 @@ bool SampleProfileLoader::inlineHotFunctions(Function &F) {
|
||||||
Function &F) -> AssumptionCache & { return ACT->getAssumptionCache(F); };
|
Function &F) -> AssumptionCache & { return ACT->getAssumptionCache(F); };
|
||||||
while (true) {
|
while (true) {
|
||||||
bool LocalChanged = false;
|
bool LocalChanged = false;
|
||||||
SmallVector<CallInst *, 10> CIS;
|
SmallVector<Instruction *, 10> CIS;
|
||||||
for (auto &BB : F) {
|
for (auto &BB : F) {
|
||||||
for (auto &I : BB.getInstList()) {
|
for (auto &I : BB.getInstList()) {
|
||||||
CallInst *CI = dyn_cast<CallInst>(&I);
|
const FunctionSamples *FS = nullptr;
|
||||||
if (CI && callsiteIsHot(Samples, findCalleeFunctionSamples(*CI)))
|
if ((isa<CallInst>(I) || isa<InvokeInst>(I)) &&
|
||||||
CIS.push_back(CI);
|
(FS = findCalleeFunctionSamples(I))) {
|
||||||
|
|
||||||
|
if (callsiteIsHot(Samples, FS))
|
||||||
|
CIS.push_back(&I);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for (auto CI : CIS) {
|
}
|
||||||
|
for (auto I : CIS) {
|
||||||
InlineFunctionInfo IFI(nullptr, ACT ? &GetAssumptionCache : nullptr);
|
InlineFunctionInfo IFI(nullptr, ACT ? &GetAssumptionCache : nullptr);
|
||||||
Function *CalledFunction = CI->getCalledFunction();
|
CallInst *CI = dyn_cast<CallInst>(I);
|
||||||
DebugLoc DLoc = CI->getDebugLoc();
|
InvokeInst *II = dyn_cast<InvokeInst>(I);
|
||||||
uint64_t NumSamples = findCalleeFunctionSamples(*CI)->getTotalSamples();
|
Function *CalledFunction =
|
||||||
if (InlineFunction(CI, IFI)) {
|
(CI == nullptr ? II->getCalledFunction() : CI->getCalledFunction());
|
||||||
|
DebugLoc DLoc = I->getDebugLoc();
|
||||||
|
uint64_t NumSamples = findCalleeFunctionSamples(*I)->getTotalSamples();
|
||||||
|
if ((CI && InlineFunction(CI, IFI)) || (II && InlineFunction(II, IFI))) {
|
||||||
LocalChanged = true;
|
LocalChanged = true;
|
||||||
emitOptimizationRemark(Ctx, DEBUG_TYPE, F, DLoc,
|
emitOptimizationRemark(Ctx, DEBUG_TYPE, F, DLoc,
|
||||||
Twine("inlined hot callee '") +
|
Twine("inlined hot callee '") +
|
||||||
|
|
|
@ -0,0 +1,3 @@
|
||||||
|
_Z3foov:200:100
|
||||||
|
1: _Z3barv:100
|
||||||
|
3: _Z3barv:100
|
|
@ -0,0 +1,47 @@
|
||||||
|
; RUN: opt < %s -instcombine -sample-profile -sample-profile-file=%S/Inputs/einline.prof | FileCheck %s
|
||||||
|
|
||||||
|
; Checks if both call and invoke can be inlined early if their inlined
|
||||||
|
; instances are hot in profile.
|
||||||
|
|
||||||
|
target triple = "x86_64-unknown-linux-gnu"
|
||||||
|
|
||||||
|
@_ZTIi = external constant i8*
|
||||||
|
|
||||||
|
; Function Attrs: uwtable
|
||||||
|
define void @_Z3foov() #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg !6 {
|
||||||
|
%1 = alloca i8*
|
||||||
|
%2 = alloca i32
|
||||||
|
%3 = alloca i32, align 4
|
||||||
|
; CHECK-NOT: call
|
||||||
|
call void @_ZL3barv(), !dbg !9
|
||||||
|
; CHECK-NOT: invoke
|
||||||
|
invoke void @_ZL3barv()
|
||||||
|
to label %4 unwind label %5, !dbg !10
|
||||||
|
|
||||||
|
; <label>:4:
|
||||||
|
ret void
|
||||||
|
|
||||||
|
; <label>:5:
|
||||||
|
%6 = landingpad { i8*, i32 }
|
||||||
|
catch i8* bitcast (i8** @_ZTIi to i8*)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; Function Attrs: nounwind uwtable
|
||||||
|
define internal void @_ZL3barv() #1 {
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
declare i32 @__gxx_personality_v0(...)
|
||||||
|
|
||||||
|
!llvm.dbg.cu = !{!0}
|
||||||
|
!llvm.module.flags = !{!3, !4}
|
||||||
|
|
||||||
|
!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1)
|
||||||
|
!1 = !DIFile(filename: "a", directory: "b/")
|
||||||
|
!3 = !{i32 2, !"Dwarf Version", i32 4}
|
||||||
|
!4 = !{i32 2, !"Debug Info Version", i32 3}
|
||||||
|
!6 = distinct !DISubprogram(linkageName: "_Z3foov", scope: !1, line: 5, scopeLine: 5, unit: !0)
|
||||||
|
!9 = !DILocation(line: 6, column: 3, scope: !6)
|
||||||
|
!10 = !DILocation(line: 8, column: 5, scope: !11)
|
||||||
|
!11 = distinct !DILexicalBlock(scope: !6, file: !1, line: 7, column: 7)
|
Loading…
Reference in New Issue