[Profile] backward propagate profile info in JumpThreading

Take-2 after fixing bugs in the original patch.

Differential Revsion: http://reviews.llvm.org/D36864

llvm-svn: 311727
This commit is contained in:
Xinliang David Li 2017-08-24 22:54:01 +00:00
parent bb789381fc
commit 66531dd10a
3 changed files with 256 additions and 1 deletions

View File

@ -126,7 +126,116 @@ JumpThreadingPass::JumpThreadingPass(int T) {
BBDupThreshold = (T == -1) ? BBDuplicateThreshold : unsigned(T);
}
/// runOnFunction - Top level algorithm.
// Update branch probability information according to conditional
// branch probablity. This is usually made possible for cloned branches
// in inline instances by the context specific profile in the caller.
// For instance,
//
// [Block PredBB]
// [Branch PredBr]
// if (t) {
// Block A;
// } else {
// Block B;
// }
//
// [Block BB]
// cond = PN([true, %A], [..., %B]); // PHI node
// [Branch CondBr]
// if (cond) {
// ... // P(cond == true) = 1%
// }
//
// Here we know that when block A is taken, cond must be true, which means
// P(cond == true | A) = 1
//
// Given that P(cond == true) = P(cond == true | A) * P(A) +
// P(cond == true | B) * P(B)
// we get
// P(cond == true ) = P(A) + P(cond == true | B) * P(B)
//
// which gives us:
// P(A) <= P(c == true), i.e.
// P(t == true) <= P(cond == true)
//
// In other words, if we know P(cond == true), we know that P(t == true)
// can not be greater than 1%.
static void updatePredecessorProfileMetadata(PHINode *PN, BasicBlock *BB) {
BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());
if (!CondBr)
return;
BranchProbability BP;
uint64_t TrueWeight, FalseWeight;
if (!CondBr->extractProfMetadata(TrueWeight, FalseWeight))
return;
// Returns the outgoing edge of the dominating predecessor block
// that leads to the PhiNode's incoming block:
auto GetPredOutEdge =
[](BasicBlock *IncomingBB,
BasicBlock *PhiBB) -> std::pair<BasicBlock *, BasicBlock *> {
auto *PredBB = IncomingBB;
auto *SuccBB = PhiBB;
for (;;) {
BranchInst *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator());
if (PredBr && PredBr->isConditional())
return {PredBB, SuccBB};
auto *SinglePredBB = PredBB->getSinglePredecessor();
if (!SinglePredBB)
return {nullptr, nullptr};
SuccBB = PredBB;
PredBB = SinglePredBB;
}
};
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
Value *PhiOpnd = PN->getIncomingValue(i);
ConstantInt *CI = dyn_cast<ConstantInt>(PhiOpnd);
if (!CI || !CI->getType()->isIntegerTy(1))
continue;
BP = (CI->isOne() ? BranchProbability::getBranchProbability(
TrueWeight, TrueWeight + FalseWeight)
: BranchProbability::getBranchProbability(
FalseWeight, TrueWeight + FalseWeight));
auto PredOutEdge = GetPredOutEdge(PN->getIncomingBlock(i), BB);
if (!PredOutEdge.first)
return;
BasicBlock *PredBB = PredOutEdge.first;
BranchInst *PredBr = cast<BranchInst>(PredBB->getTerminator());
uint64_t PredTrueWeight, PredFalseWeight;
// FIXME: We currently only set the profile data when it is missing.
// With PGO, this can be used to refine even existing profile data with
// context information. This needs to be done after more performance
// testing.
if (PredBr->extractProfMetadata(PredTrueWeight, PredFalseWeight))
continue;
// We can not infer anything useful when BP >= 50%, because BP is the
// upper bound probability value.
if (BP >= BranchProbability(50, 100))
continue;
SmallVector<uint32_t, 2> Weights;
if (PredBr->getSuccessor(0) == PredOutEdge.second) {
Weights.push_back(BP.getNumerator());
Weights.push_back(BP.getCompl().getNumerator());
} else {
Weights.push_back(BP.getCompl().getNumerator());
Weights.push_back(BP.getNumerator());
}
PredBr->setMetadata(LLVMContext::MD_prof,
MDBuilder(PredBr->getParent()->getContext())
.createBranchWeights(Weights));
}
}
/// runOnFunction - Toplevel algorithm.
///
bool JumpThreading::runOnFunction(Function &F) {
if (skipFunction(F))
@ -991,6 +1100,11 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {
if (SimplifyPartiallyRedundantLoad(LI))
return true;
// Before threading, try to propagate profile data backwards:
if (PHINode *PN = dyn_cast<PHINode>(CondInst))
if (PN->getParent() == BB && isa<BranchInst>(BB->getTerminator()))
updatePredecessorProfileMetadata(PN, BB);
// Handle a variety of cases where we are branching on something derived from
// a PHI node in the current block. If we can prove that any predecessors
// compute a predictable value based on a PHI node, thread those predecessors.

View File

@ -0,0 +1,99 @@
; RUN: opt -jump-threading -S < %s | FileCheck %s
; RUN: opt -passes=jump-threading -S < %s | FileCheck %s
define void @test() {
; CHECK-LABEL: @test()
bb:
%tmp = call i32 @a()
%tmp1 = icmp eq i32 %tmp, 1
br i1 %tmp1, label %bb5, label %bb2
; CHECK: br i1 %tmp1,{{.*}} !prof ![[PROF1:[0-9]+]]
bb2: ; preds = %bb
%tmp3 = call i32 @b()
%tmp4 = icmp ne i32 %tmp3, 1
br label %bb5
; CHECK: br i1 %tmp4, {{.*}} !prof ![[PROF2:[0-9]+]]
bb5: ; preds = %bb2, %bb
%tmp6 = phi i1 [ false, %bb ], [ %tmp4, %bb2 ]
br i1 %tmp6, label %bb8, label %bb7, !prof !0
bb7: ; preds = %bb5
call void @bar()
br label %bb8
bb8: ; preds = %bb7, %bb5
ret void
}
define void @test_single_pred1() {
; CHECK-LABEL: @test_single_pred1()
bb:
%tmp = call i32 @a()
%tmp1 = icmp eq i32 %tmp, 1
br i1 %tmp1, label %bb5_1, label %bb2
; CHECK: br i1 %tmp1,{{.*}} !prof ![[PROF1:[0-9]+]]
bb5_1:
br label %bb5;
bb2:
%tmp3 = call i32 @b()
%tmp4 = icmp ne i32 %tmp3, 1
br label %bb5
; CHECK: br i1 %tmp4, {{.*}} !prof ![[PROF2:[0-9]+]]
bb5:
%tmp6 = phi i1 [ false, %bb5_1 ], [ %tmp4, %bb2 ]
br i1 %tmp6, label %bb8, label %bb7, !prof !0
bb7:
call void @bar()
br label %bb8
bb8:
ret void
}
define void @test_single_pred2() {
; CHECK-LABEL: @test_single_pred2()
bb:
%tmp = call i32 @a()
%tmp1 = icmp eq i32 %tmp, 1
br i1 %tmp1, label %bb5_1, label %bb2
; CHECK: br i1 %tmp1,{{.*}} !prof ![[PROF1:[0-9]+]]
bb5_1:
br label %bb5_2;
bb5_2:
br label %bb5;
bb2:
%tmp3 = call i32 @b()
%tmp4 = icmp ne i32 %tmp3, 1
br label %bb5
; CHECK: br i1 %tmp4, {{.*}} !prof ![[PROF2:[0-9]+]]
bb5:
%tmp6 = phi i1 [ false, %bb5_2 ], [ %tmp4, %bb2 ]
br i1 %tmp6, label %bb8, label %bb7, !prof !0
bb7:
call void @bar()
br label %bb8
bb8:
ret void
}
declare void @bar()
declare i32 @a()
declare i32 @b()
!0 = !{!"branch_weights", i32 2146410443, i32 1073205}
;CHECK: ![[PROF1]] = !{!"branch_weights", i32 1073205, i32 2146410443}
;CHECK: ![[PROF2]] = !{!"branch_weights", i32 2146410443, i32 1073205}

View File

@ -0,0 +1,42 @@
; RUN: opt -jump-threading -S < %s | FileCheck %s
; RUN: opt -passes=jump-threading -S < %s | FileCheck %s
define void @test() {
bb:
%tmp = call i32 @a()
%tmp1 = icmp eq i32 %tmp, 1
br i1 %tmp1, label %bb5, label %bb2
; CHECK: br i1 %tmp1,{{.*}} !prof ![[PROF1:[0-9]+]]
bb2:
%tmp3 = call i32 @b()
%tmp4 = icmp ne i32 %tmp3, 1
br label %bb5
; CHECK: br i1 %tmp4, {{.*}} !prof ![[PROF2:[0-9]+]]
bb5:
%tmp6 = phi i1 [ false, %bb ], [ %tmp4, %bb2 ]
br i1 %tmp6, label %bb8, label %bb7, !prof !0
bb7:
call void @bar()
br label %bb9
bb8:
call void @foo()
br label %bb9
bb9:
ret void
}
declare void @bar()
declare void @foo()
declare i32 @a()
declare i32 @b()
!0 = !{!"branch_weights", i32 2146410443, i32 1073205}
;CHECK: ![[PROF1]] = !{!"branch_weights", i32 1073205, i32 2146410443}
;CHECK: ![[PROF2]] = !{!"branch_weights", i32 2146410443, i32 1073205}