[PGO] Handle cases of failing to split critical edges

Fix PR41279 where critical edges to EHPad are not split.
The fix is to not instrument those critical edges. We used to be able to know
the size of counters right after MST is computed. With this, we have to
pre-collect the instrument BBs to know the size, and then instrument them.

Differential Revision: https://reviews.llvm.org/D62439

llvm-svn: 361882
This commit is contained in:
Rong Xu 2019-05-28 21:45:56 +00:00
parent 5b32f60ec3
commit e88173abc0
2 changed files with 123 additions and 44 deletions

View File

@ -573,6 +573,10 @@ public:
// The Minimum Spanning Tree of function CFG.
CFGMST<Edge, BBInfo> MST;
// Collect all the BBs that will be instrumented, and store them in
// InstrumentBBs.
void getInstrumentBBs(std::vector<BasicBlock *> &InstrumentBBs);
// Give an edge, find the BB that will be instrumented.
// Return nullptr if there is no BB to be instrumented.
BasicBlock *getInstrBB(Edge *E);
@ -629,16 +633,6 @@ public:
if (CreateGlobalVar)
FuncNameVar = createPGOFuncNameVar(F, FuncName);
}
// Return the number of profile counters needed for the function.
unsigned getNumCounters() {
unsigned NumCounters = 0;
for (auto &E : this->MST.AllEdges) {
if (!E->InMST && !E->Removed)
NumCounters++;
}
return NumCounters + SIVisitor.getNumOfSelectInsts();
}
};
} // end anonymous namespace
@ -753,6 +747,24 @@ void FuncPGOInstrumentation<Edge, BBInfo>::renameComdatFunction() {
}
}
// Collect all the BBs that will be instruments and return them in
// InstrumentBBs.
template <class Edge, class BBInfo>
void FuncPGOInstrumentation<Edge, BBInfo>::getInstrumentBBs(
std::vector<BasicBlock *> &InstrumentBBs) {
// Use a worklist as we will update the vector during the iteration.
std::vector<Edge *> EdgeList;
EdgeList.reserve(MST.AllEdges.size());
for (auto &E : MST.AllEdges)
EdgeList.push_back(E.get());
for (auto &E : EdgeList) {
BasicBlock *InstrBB = getInstrBB(E);
if (InstrBB)
InstrumentBBs.push_back(InstrBB);
}
}
// Given a CFG E to be instrumented, find which BB to place the instrumented
// code. The function will split the critical edge if necessary.
template <class Edge, class BBInfo>
@ -783,9 +795,18 @@ BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) {
<< " --> " << getBBInfo(DestBB).Index << "\n");
unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
BasicBlock *InstrBB = SplitCriticalEdge(TI, SuccNum);
assert(InstrBB && "Critical edge is not split");
if (!InstrBB) {
LLVM_DEBUG(
dbgs() << "Fail to split critical edge: not instrument this edge.\n");
return nullptr;
}
// Need to add two new edges. First one: Add new edge of SrcBB->InstrBB.
MST.addEdge(SrcBB, InstrBB, 0);
// Second one: Add new edge of InstrBB->DestBB.
Edge &NewEdge1 = MST.addEdge(InstrBB, DestBB, 0);
NewEdge1.InMST = true;
E->Removed = true;
return InstrBB;
}
@ -801,15 +822,14 @@ static void instrumentOneFunc(
FuncPGOInstrumentation<PGOEdge, BBInfo> FuncInfo(F, ComdatMembers, true, BPI,
BFI, IsCS);
unsigned NumCounters = FuncInfo.getNumCounters();
std::vector<BasicBlock *> InstrumentBBs;
FuncInfo.getInstrumentBBs(InstrumentBBs);
unsigned NumCounters =
InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
uint32_t I = 0;
Type *I8PtrTy = Type::getInt8PtrTy(M->getContext());
for (auto &E : FuncInfo.MST.AllEdges) {
BasicBlock *InstrBB = FuncInfo.getInstrBB(E.get());
if (!InstrBB)
continue;
for (auto *InstrBB : InstrumentBBs) {
IRBuilder<> Builder(InstrBB, InstrBB->getFirstInsertionPt());
assert(Builder.GetInsertPoint() != InstrBB->end() &&
"Cannot get the Instrumentation point");
@ -1039,39 +1059,31 @@ private:
// edges and the BB. Return false on error.
bool PGOUseFunc::setInstrumentedCounts(
const std::vector<uint64_t> &CountFromProfile) {
std::vector<BasicBlock *> InstrumentBBs;
FuncInfo.getInstrumentBBs(InstrumentBBs);
unsigned NumCounters =
InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
// The number of counters here should match the number of counters
// in profile. Return if they mismatch.
if (FuncInfo.getNumCounters() != CountFromProfile.size()) {
if (NumCounters != CountFromProfile.size()) {
return false;
}
// Use a worklist as we will update the vector during the iteration.
std::vector<PGOUseEdge *> WorkList;
for (auto &E : FuncInfo.MST.AllEdges)
WorkList.push_back(E.get());
uint32_t I = 0;
for (auto &E : WorkList) {
BasicBlock *InstrBB = FuncInfo.getInstrBB(E);
if (!InstrBB)
continue;
for (BasicBlock *InstrBB : InstrumentBBs) {
uint64_t CountValue = CountFromProfile[I++];
if (!E->Removed) {
getBBInfo(InstrBB).setBBInfoCount(CountValue);
E->setEdgeCount(CountValue);
continue;
UseBBInfo &Info = getBBInfo(InstrBB);
Info.setBBInfoCount(CountValue);
// If only one in-edge, the edge profile count should be the same as BB
// profile count.
if (Info.InEdges.size() == 1) {
Info.InEdges[0]->setEdgeCount(CountValue);
}
// If only one out-edge, the edge profile count should be the same as BB
// profile count.
if (Info.OutEdges.size() == 1) {
Info.OutEdges[0]->setEdgeCount(CountValue);
}
// Need to add two new edges.
BasicBlock *SrcBB = const_cast<BasicBlock *>(E->SrcBB);
BasicBlock *DestBB = const_cast<BasicBlock *>(E->DestBB);
// Add new edge of SrcBB->InstrBB.
PGOUseEdge &NewEdge = FuncInfo.MST.addEdge(SrcBB, InstrBB, 0);
NewEdge.setEdgeCount(CountValue);
// Add new edge of InstrBB->DestBB.
PGOUseEdge &NewEdge1 = FuncInfo.MST.addEdge(InstrBB, DestBB, 0);
NewEdge1.setEdgeCount(CountValue);
NewEdge1.InMST = true;
getBBInfo(InstrBB).setBBInfoCount(CountValue);
}
ProfileCountSize = CountFromProfile.size();
CountPosition = I;

View File

@ -0,0 +1,67 @@
; Test that instrumentaiton works fine for the case of failing the split critical edges.
; RUN: opt < %s -pgo-instr-gen -S | FileCheck %s --check-prefix=GEN
; RUN: opt < %s -passes=pgo-instr-gen -S | FileCheck %s --check-prefix=GEN
declare void @f3({ i8*, i64 }*, { i8*, i64 }*, i64)
declare { i8*, i64 } @f0({ i8*, i64 }*)
declare i64 @f1()
declare void @invok2({ i8*, i64 }*, i8* noalias readonly align 1, i64)
declare void @invok1({ i8*, i64 }*, { i8*, i64 }*, i64)
declare i32 @__CxxFrameHandler3(...)
define internal void @foo({ i8*, i64 }*, { i8*, i64 }*) personality i32 (...)* @__CxxFrameHandler3 {
%3 = alloca i8, align 1
store i8 0, i8* %3, align 1
%4 = call i64 @f1()
%5 = icmp ult i64 %4, 32
br i1 %5, label %7, label %13
6:
cleanupret from %17 unwind to caller
; GEN: 6:
; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @__profn__stdin__foo, i32 0, i32 0), i64 60927483247, i32 4, i32 2)
7:
store i8 1, i8* %3, align 1
%8 = call { i8*, i64 } @f0({ i8*, i64 }* %0)
%9 = extractvalue { i8*, i64 } %8, 0
%10 = extractvalue { i8*, i64 } %8, 1
invoke void @invok1({ i8*, i64 }* %1, { i8*, i64 }* %0, i64 1)
to label %11 unwind label %16
; GEN: 7:
; GEN-NOT: call void @llvm.instrprof.increment
11:
store i8 0, i8* %3, align 1
invoke void @invok2({ i8*, i64 }* %1, i8* noalias readonly align 1 %9, i64 %10)
to label %12 unwind label %16
; GEN: 11:
; GEN-NOT: call void @llvm.instrprof.increment
12:
store i8 0, i8* %3, align 1
br label %14
; GEN: 12:
; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @__profn__stdin__foo, i32 0, i32 0), i64 60927483247, i32 4, i32 1)
13:
call void @f3({ i8*, i64 }* %0, { i8*, i64 }* %1, i64 1)
br label %14
; GEN: 13:
; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @__profn__stdin__foo, i32 0, i32 0), i64 60927483247, i32 4, i32 0)
14:
ret void
15:
store i8 0, i8* %3, align 1
br label %6
; GEN: 15:
; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @__profn__stdin__foo, i32 0, i32 0), i64 60927483247, i32 4, i32 3)
16:
%17 = cleanuppad within none []
%18 = load i8, i8* %3, align 1
%19 = trunc i8 %18 to i1
br i1 %19, label %15, label %6
}