[ValueTracking] Remove dead code from an old experiment

This experiment was originally about trying to use facts implied dominating conditions to infer more precise known bits.  While the compile time was found to be acceptable on several large code bases, we never found sufficiently profitable examples to justify turning on the code by default.  Given this, it's time to abandon the experiment.  

Several folks have commented that they've found this useful for experimentation, but nothing has come of those experiments.  Given how easy the patch is to apply, there's no reason to leave the code in tree.  

For anyone interested in further investigation in this area, I recommend finding the summary email I sent on one of the original review threads.  In particular, I now believe the use-list based approach is strictly worse than the dom-tree-walking approach.  

llvm-svn: 262646
This commit is contained in:
Philip Reames 2016-03-03 19:44:06 +00:00
parent 9bba75084b
commit 146307eb52
5 changed files with 2 additions and 455 deletions

View File

@ -45,25 +45,6 @@ using namespace llvm::PatternMatch;
const unsigned MaxDepth = 6;
/// Enable an experimental feature to leverage information about dominating
/// conditions to compute known bits. The individual options below control how
/// hard we search. The defaults are chosen to be fairly aggressive. If you
/// run into compile time problems when testing, scale them back and report
/// your findings.
static cl::opt<bool> EnableDomConditions("value-tracking-dom-conditions",
cl::Hidden, cl::init(false));
// This is expensive, so we only do it for the top level query value.
// (TODO: evaluate cost vs profit, consider higher thresholds)
static cl::opt<unsigned> DomConditionsMaxDepth("dom-conditions-max-depth",
cl::Hidden, cl::init(1));
/// How many dominating blocks should be scanned looking for dominating
/// conditions?
static cl::opt<unsigned> DomConditionsMaxDomBlocks("dom-conditions-dom-blocks",
cl::Hidden,
cl::init(20));
// Controls the number of uses of the value searched for possible
// dominating comparisons.
static cl::opt<unsigned> DomConditionsMaxUses("dom-conditions-max-uses",
@ -546,187 +527,6 @@ m_c_Xor(const LHS &L, const RHS &R) {
return m_CombineOr(m_Xor(L, R), m_Xor(R, L));
}
/// Compute known bits in 'V' under the assumption that the condition 'Cmp' is
/// true (at the context instruction.) This is mostly a utility function for
/// the prototype dominating conditions reasoning below.
static void computeKnownBitsFromTrueCondition(Value *V, ICmpInst *Cmp,
APInt &KnownZero,
APInt &KnownOne,
unsigned Depth, const Query &Q) {
Value *LHS = Cmp->getOperand(0);
Value *RHS = Cmp->getOperand(1);
// TODO: We could potentially be more aggressive here. This would be worth
// evaluating. If we can, explore commoning this code with the assume
// handling logic.
if (LHS != V && RHS != V)
return;
const unsigned BitWidth = KnownZero.getBitWidth();
switch (Cmp->getPredicate()) {
default:
// We know nothing from this condition
break;
// TODO: implement unsigned bound from below (known one bits)
// TODO: common condition check implementations with assumes
// TODO: implement other patterns from assume (e.g. V & B == A)
case ICmpInst::ICMP_SGT:
if (LHS == V) {
APInt KnownZeroTemp(BitWidth, 0), KnownOneTemp(BitWidth, 0);
computeKnownBits(RHS, KnownZeroTemp, KnownOneTemp, Depth + 1, Q);
if (KnownOneTemp.isAllOnesValue() || KnownZeroTemp.isNegative()) {
// We know that the sign bit is zero.
KnownZero |= APInt::getSignBit(BitWidth);
}
}
break;
case ICmpInst::ICMP_EQ:
{
APInt KnownZeroTemp(BitWidth, 0), KnownOneTemp(BitWidth, 0);
if (LHS == V)
computeKnownBits(RHS, KnownZeroTemp, KnownOneTemp, Depth + 1, Q);
else if (RHS == V)
computeKnownBits(LHS, KnownZeroTemp, KnownOneTemp, Depth + 1, Q);
else
llvm_unreachable("missing use?");
KnownZero |= KnownZeroTemp;
KnownOne |= KnownOneTemp;
}
break;
case ICmpInst::ICMP_ULE:
if (LHS == V) {
APInt KnownZeroTemp(BitWidth, 0), KnownOneTemp(BitWidth, 0);
computeKnownBits(RHS, KnownZeroTemp, KnownOneTemp, Depth + 1, Q);
// The known zero bits carry over
unsigned SignBits = KnownZeroTemp.countLeadingOnes();
KnownZero |= APInt::getHighBitsSet(BitWidth, SignBits);
}
break;
case ICmpInst::ICMP_ULT:
if (LHS == V) {
APInt KnownZeroTemp(BitWidth, 0), KnownOneTemp(BitWidth, 0);
computeKnownBits(RHS, KnownZeroTemp, KnownOneTemp, Depth + 1, Q);
// Whatever high bits in rhs are zero are known to be zero (if rhs is a
// power of 2, then one more).
unsigned SignBits = KnownZeroTemp.countLeadingOnes();
if (isKnownToBeAPowerOfTwo(RHS, false, Depth + 1, Query(Q, Cmp)))
SignBits++;
KnownZero |= APInt::getHighBitsSet(BitWidth, SignBits);
}
break;
};
}
/// Compute known bits in 'V' from conditions which are known to be true along
/// all paths leading to the context instruction. In particular, look for
/// cases where one branch of an interesting condition dominates the context
/// instruction. This does not do general dataflow.
/// NOTE: This code is EXPERIMENTAL and currently off by default.
static void computeKnownBitsFromDominatingCondition(Value *V, APInt &KnownZero,
APInt &KnownOne,
unsigned Depth,
const Query &Q) {
// Need both the dominator tree and the query location to do anything useful
if (!Q.DT || !Q.CxtI)
return;
Instruction *Cxt = const_cast<Instruction *>(Q.CxtI);
// The context instruction might be in a statically unreachable block. If
// so, asking dominator queries may yield suprising results. (e.g. the block
// may not have a dom tree node)
if (!Q.DT->isReachableFromEntry(Cxt->getParent()))
return;
// Avoid useless work
if (auto VI = dyn_cast<Instruction>(V))
if (VI->getParent() == Cxt->getParent())
return;
// Note: We currently implement two options. It's not clear which of these
// will survive long term, we need data for that.
// Option 1 - Try walking the dominator tree looking for conditions which
// might apply. This works well for local conditions (loop guards, etc..),
// but not as well for things far from the context instruction (presuming a
// low max blocks explored). If we can set an high enough limit, this would
// be all we need.
// Option 2 - We restrict out search to those conditions which are uses of
// the value we're interested in. This is independent of dom structure,
// but is slightly less powerful without looking through lots of use chains.
// It does handle conditions far from the context instruction (e.g. early
// function exits on entry) really well though.
// Option 1 - Search the dom tree
unsigned NumBlocksExplored = 0;
BasicBlock *Current = Cxt->getParent();
while (true) {
// Stop searching if we've gone too far up the chain
if (NumBlocksExplored >= DomConditionsMaxDomBlocks)
break;
NumBlocksExplored++;
if (!Q.DT->getNode(Current)->getIDom())
break;
Current = Q.DT->getNode(Current)->getIDom()->getBlock();
if (!Current)
// found function entry
break;
BranchInst *BI = dyn_cast<BranchInst>(Current->getTerminator());
if (!BI || BI->isUnconditional())
continue;
ICmpInst *Cmp = dyn_cast<ICmpInst>(BI->getCondition());
if (!Cmp)
continue;
// We're looking for conditions that are guaranteed to hold at the context
// instruction. Finding a condition where one path dominates the context
// isn't enough because both the true and false cases could merge before
// the context instruction we're actually interested in. Instead, we need
// to ensure that the taken *edge* dominates the context instruction. We
// know that the edge must be reachable since we started from a reachable
// block.
BasicBlock *BB0 = BI->getSuccessor(0);
BasicBlockEdge Edge(BI->getParent(), BB0);
if (!Edge.isSingleEdge() || !Q.DT->dominates(Edge, Q.CxtI->getParent()))
continue;
computeKnownBitsFromTrueCondition(V, Cmp, KnownZero, KnownOne, Depth, Q);
}
// Option 2 - Search the other uses of V
unsigned NumUsesExplored = 0;
for (auto U : V->users()) {
// Avoid massive lists
if (NumUsesExplored >= DomConditionsMaxUses)
break;
NumUsesExplored++;
// Consider only compare instructions uniquely controlling a branch
ICmpInst *Cmp = dyn_cast<ICmpInst>(U);
if (!Cmp)
continue;
if (DomConditionsSingleCmpUse && !Cmp->hasOneUse())
continue;
for (auto *CmpU : Cmp->users()) {
BranchInst *BI = dyn_cast<BranchInst>(CmpU);
if (!BI || BI->isUnconditional())
continue;
// We're looking for conditions that are guaranteed to hold at the
// context instruction. Finding a condition where one path dominates
// the context isn't enough because both the true and false cases could
// merge before the context instruction we're actually interested in.
// Instead, we need to ensure that the taken *edge* dominates the context
// instruction.
BasicBlock *BB0 = BI->getSuccessor(0);
BasicBlockEdge Edge(BI->getParent(), BB0);
if (!Edge.isSingleEdge() || !Q.DT->dominates(Edge, Q.CxtI->getParent()))
continue;
computeKnownBitsFromTrueCondition(V, Cmp, KnownZero, KnownOne, Depth, Q);
}
}
}
static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
APInt &KnownOne, unsigned Depth,
const Query &Q) {
@ -1657,18 +1457,12 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
KnownZero |= APInt::getLowBitsSet(BitWidth, countTrailingZeros(Align));
}
// computeKnownBitsFromAssume and computeKnownBitsFromDominatingCondition
// strictly refines KnownZero and KnownOne. Therefore, we run them after
// computeKnownBitsFromOperator.
// computeKnownBitsFromAssume strictly refines KnownZero and
// KnownOne. Therefore, we run them after computeKnownBitsFromOperator.
// Check whether a nearby assume intrinsic can determine some known bits.
computeKnownBitsFromAssume(V, KnownZero, KnownOne, Depth, Q);
// Check whether there's a dominating condition which implies something about
// this value at the given context.
if (EnableDomConditions && Depth <= DomConditionsMaxDepth)
computeKnownBitsFromDominatingCondition(V, KnownZero, KnownOne, Depth, Q);
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
}

View File

@ -1,18 +0,0 @@
; RUN: opt < %s -instcombine -value-tracking-dom-conditions -S | FileCheck %s
define i32 @dom_cond(i32 %a, i32 %b) {
; CHECK-LABEL: @dom_cond(
entry:
%v = add i32 %a, %b
%cond = icmp ule i32 %v, 7
br i1 %cond, label %then, label %exit
then:
%v2 = add i32 %v, 8
; CHECK: or i32 %v, 8
br label %exit
exit:
%v3 = phi i32 [ %v, %entry ], [ %v2, %then ]
ret i32 %v3
}

View File

@ -1,44 +0,0 @@
; RUN: opt -S %s -value-tracking-dom-conditions -licm -load-combine | FileCheck %s
; In pr24866.ll, we saw a crash when accessing a nullptr returned when
; asking for a dominator tree Node. This reproducer is really fragile,
; but it's currently the best we have.
%struct.c_derived_tbl.2.5.8.11.14.17.23.38.59.80.92.98.104.107.155.183 = type { [256 x i32], [256 x i8] }
; Function Attrs: nounwind uwtable
define void @encode_one_blockX2(%struct.c_derived_tbl.2.5.8.11.14.17.23.38.59.80.92.98.104.107.155.183* nocapture readonly %actbl) #0 {
; CHECK-LABEL: @encode_one_blockX2
entry:
br i1 false, label %L_KLOOP_01, label %L_KLOOP.preheader
L_KLOOP_01: ; preds = %while.end, %entry
br label %L_KLOOP.preheader
L_KLOOP_08: ; preds = %while.end
br label %L_KLOOP.preheader
L_KLOOP.preheader: ; preds = %L_KLOOP_08, %L_KLOOP_01, %entry
%r.2.ph = phi i32 [ undef, %L_KLOOP_08 ], [ 0, %entry ], [ undef, %L_KLOOP_01 ]
br label %L_KLOOP
L_KLOOP: ; preds = %while.end, %L_KLOOP.preheader
%r.2 = phi i32 [ 0, %while.end ], [ %r.2.ph, %L_KLOOP.preheader ]
br i1 true, label %while.body, label %while.end
while.body: ; preds = %while.body, %L_KLOOP
br label %while.body
while.end: ; preds = %L_KLOOP
%shl105 = shl i32 %r.2, 4
%add106 = add nsw i32 %shl105, undef
%idxprom107 = sext i32 %add106 to i64
%arrayidx108 = getelementptr inbounds %struct.c_derived_tbl.2.5.8.11.14.17.23.38.59.80.92.98.104.107.155.183, %struct.c_derived_tbl.2.5.8.11.14.17.23.38.59.80.92.98.104.107.155.183* %actbl, i64 0, i32 0, i64 %idxprom107
%0 = load i32, i32* %arrayidx108, align 4
%arrayidx110 = getelementptr inbounds %struct.c_derived_tbl.2.5.8.11.14.17.23.38.59.80.92.98.104.107.155.183, %struct.c_derived_tbl.2.5.8.11.14.17.23.38.59.80.92.98.104.107.155.183* %actbl, i64 0, i32 1, i64 %idxprom107
%1 = load i8, i8* %arrayidx110, align 1
indirectbr i8* undef, [label %L_KLOOP_DONE, label %L_KLOOP_01, label %L_KLOOP_08, label %L_KLOOP]
L_KLOOP_DONE: ; preds = %while.end
ret void
}

View File

@ -1,152 +0,0 @@
; RUN: opt -instcombine -value-tracking-dom-conditions=1 -S < %s | FileCheck %s
target datalayout = "e-p:64:64:64-p1:16:16:16-p2:32:32:32-p3:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
define i1 @test_cmp_ult(i64 %A) {
; CHECK-LABEL: @test_cmp_ult
entry:
%cmp = icmp ult i64 %A, 64
br i1 %cmp, label %taken, label %untaken
taken:
; CHECK-LABEL: taken:
; CHECK-NEXT: ret i1 false
%cmp2 = icmp ugt i64 %A, 64
ret i1 %cmp2
untaken:
ret i1 true
}
define i1 @test_cmp_ule(i64 %A) {
; CHECK-LABEL: @test_cmp_ule
entry:
%cmp = icmp ule i64 %A, 64
br i1 %cmp, label %taken, label %untaken
taken:
; CHECK-LABEL: taken:
; CHECK-NEXT: ret i1 false
%cmp2 = icmp ugt i64 %A, 128
ret i1 %cmp2
untaken:
ret i1 true
}
define i1 @test_cmp_sgt(i32 %A) {
; CHECK-LABEL: @test_cmp_sgt
entry:
%cmp = icmp sgt i32 %A, 10
br i1 %cmp, label %taken, label %untaken
taken:
; CHECK-LABEL: taken:
; CHECK-NEXT: ret i1 true
%cmp2 = icmp sgt i32 %A, -1
ret i1 %cmp2
untaken:
ret i1 true
}
define i64 @test_add_zero_bits(i64 %A) {
; CHECK-LABEL: @test_add_zero_bits
entry:
%cmp = icmp eq i64 %A, 2
br i1 %cmp, label %taken, label %untaken
taken:
; CHECK-LABEL: taken:
; CHECK-NEXT: ret i64 3
%add = add i64 %A, 1
ret i64 %add
untaken:
ret i64 %A
}
define i64 @test_add_nsw(i64 %A) {
; CHECK-LABEL: @test_add_nsw
entry:
%cmp = icmp ult i64 %A, 20
br i1 %cmp, label %taken, label %untaken
taken:
; CHECK-LABEL: taken:
; CHECK-NEXT: %add = add nuw nsw i64 %A, 1
; CHECK-NEXT: ret i64 %add
%add = add i64 %A, 1
ret i64 %add
untaken:
ret i64 %A
}
; After sinking the instructions into the if block, check that we
; can simplify some of them using dominating conditions.
define i32 @test_add_zero_bits_sink(i32 %x) nounwind ssp {
; CHECK-LABEL: @test_add_zero_bits_sink(
; CHECK-NOT: sdiv i32
entry:
%a = add nsw i32 %x, 16
%b = sdiv i32 %a, %x
%cmp = icmp ult i32 %x, 7
br i1 %cmp, label %bb1, label %bb2
bb1:
; CHECK-LABEL: bb1:
; CHECK-NEXT: or i32 %x, 16
; CHECK-NEXT: udiv i32
ret i32 %b
bb2:
ret i32 %x
}
; A condition in the same block gives no information
define i32 @test_neg1(i32 %x) nounwind ssp {
; CHECK-LABEL: @test_neg1
; CHECK: add
; CHECK: sdiv
; CHECK: icmp
; CHECK: select
entry:
%a = add nsw i32 %x, 16
%b = sdiv i32 %a, %x
%cmp = icmp ult i32 %x, 7
%ret = select i1 %cmp, i32 %a, i32 %b
ret i32 %ret
}
; A non-dominating edge gives no information
define i32 @test_neg2(i32 %x) {
; CHECK-LABEL: @test_neg2
entry:
%cmp = icmp ult i32 %x, 7
br i1 %cmp, label %bb1, label %merge
bb1:
br label %merge
merge:
; CHECK-LABEL: merge:
; CHECK: icmp
; CHECK: select
%cmp2 = icmp ult i32 %x, 7
%ret = select i1 %cmp2, i32 %x, i32 0
ret i32 %ret
}
; A unconditional branch expressed as a condition one gives no
; information (and shouldn't trip any asserts.)
define i32 @test_neg3(i32 %x) {
; CHECK-LABEL: @test_neg3
entry:
%cmp = icmp ult i32 %x, 7
br i1 %cmp, label %merge, label %merge
merge:
; CHECK-LABEL: merge:
; CHECK: icmp
; CHECK: select
%cmp2 = icmp ult i32 %x, 7
%ret = select i1 %cmp2, i32 %x, i32 0
ret i32 %ret
}
declare i32 @bar()

View File

@ -1,33 +0,0 @@
; RUN: opt < %s -separate-const-offset-from-gep -value-tracking-dom-conditions -reassociate-geps-verify-no-dead-code -S | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "nvptx64-unknown-unknown"
; if (i == 4)
; p = &input[i | 3];
;
; =>
;
; if (i == 4) {
; base = &input[i];
; p = &base[3];
; }
;
; We should treat (i | 3) as (i + 3) because i is guaranteed to be 4, which
; does not share any set bits with 3.
define float* @guarded_or(float* %input, i64 %i) {
; CHECK-LABEL: @guarded_or(
entry:
%is4 = icmp eq i64 %i, 4
br i1 %is4, label %then, label %exit
then:
%or = or i64 %i, 3
%p = getelementptr inbounds float, float* %input, i64 %or
; CHECK: [[base:[^ ]+]] = getelementptr float, float* %input, i64 %i
; CHECK: getelementptr inbounds float, float* [[base]], i64 3
ret float* %p
exit:
ret float* null
}