Fix CountCodeReductionForAlloca to more accurately represent what SROA can and

can't handle. Also don't produce non-zero results for things which won't be
transformed by SROA at all just because we saw the loads/stores before we saw
the use of the address.

llvm-svn: 148536
This commit is contained in:
Nick Lewycky 2012-01-20 08:35:20 +00:00
parent c908b43d9f
commit e8415fea4b
2 changed files with 104 additions and 16 deletions

View File

@ -221,23 +221,67 @@ unsigned CodeMetrics::CountCodeReductionForConstant(Value *V) {
unsigned CodeMetrics::CountCodeReductionForAlloca(Value *V) {
if (!V->getType()->isPointerTy()) return 0; // Not a pointer
unsigned Reduction = 0;
for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){
Instruction *I = cast<Instruction>(*UI);
if (isa<LoadInst>(I) || isa<StoreInst>(I))
Reduction += InlineConstants::InstrCost;
else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
// If the GEP has variable indices, we won't be able to do much with it.
if (GEP->hasAllConstantIndices())
Reduction += CountCodeReductionForAlloca(GEP);
} else if (BitCastInst *BCI = dyn_cast<BitCastInst>(I)) {
// Track pointer through bitcasts.
Reduction += CountCodeReductionForAlloca(BCI);
} else {
// If there is some other strange instruction, we're not going to be able
// to do much if we inline this.
return 0;
SmallVector<Value *, 4> Worklist;
Worklist.push_back(V);
do {
Value *V = Worklist.pop_back_val();
for (Value::use_iterator UI = V->use_begin(), E = V->use_end();
UI != E; ++UI){
Instruction *I = cast<Instruction>(*UI);
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
if (!LI->isSimple())
return 0;
Reduction += InlineConstants::InstrCost;
} else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
if (!SI->isSimple())
return 0;
Reduction += InlineConstants::InstrCost;
} else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
// If the GEP has variable indices, we won't be able to do much with it.
if (!GEP->hasAllConstantIndices())
return 0;
// A non-zero GEP will likely become a mask operation after SROA.
if (GEP->hasAllZeroIndices())
Reduction += InlineConstants::InstrCost;
Worklist.push_back(GEP);
} else if (BitCastInst *BCI = dyn_cast<BitCastInst>(I)) {
// Track pointer through bitcasts.
Worklist.push_back(BCI);
Reduction += InlineConstants::InstrCost;
} else if (SelectInst *SI = dyn_cast<SelectInst>(I)) {
// SROA can handle a select of alloca iff all uses of the alloca are
// loads, and dereferenceable. We assume it's dereferenceable since
// we're told the input is an alloca.
for (Value::use_iterator UI = SI->use_begin(), UE = SI->use_end();
UI != UE; ++UI) {
LoadInst *LI = dyn_cast<LoadInst>(*UI);
if (LI == 0 || !LI->isSimple()) return 0;
}
// We don't know whether we'll be deleting the rest of the chain of
// instructions from the SelectInst on, because we don't know whether
// the other side of the select is also an alloca or not.
continue;
} else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
switch (II->getIntrinsicID()) {
default:
return 0;
case Intrinsic::memset:
case Intrinsic::memcpy:
case Intrinsic::memmove:
case Intrinsic::lifetime_start:
case Intrinsic::lifetime_end:
// SROA can usually chew through these intrinsics.
Reduction += InlineConstants::InstrCost;
break;
}
} else {
// If there is some other strange instruction, we're not going to be
// able to do much if we inline this.
return 0;
}
}
}
} while (!Worklist.empty());
return Reduction;
}

View File

@ -0,0 +1,44 @@
; RUN: opt -inline < %s -S -o - -inline-threshold=8 | FileCheck %s
declare void @llvm.lifetime.start(i64 %size, i8* nocapture %ptr)
@glbl = external global i32
define void @outer1() {
; CHECK: @outer1
; CHECK-NOT: call void @inner1
%ptr = alloca i32
call void @inner1(i32* %ptr)
ret void
}
define void @inner1(i32 *%ptr) {
%A = load i32* %ptr
store i32 0, i32* %ptr
%C = getelementptr i32* %ptr, i32 0
%D = getelementptr i32* %ptr, i32 1
%E = bitcast i32* %ptr to i8*
%F = select i1 false, i32* %ptr, i32* @glbl
call void @llvm.lifetime.start(i64 0, i8* %E)
ret void
}
define void @outer2() {
; CHECK: @outer2
; CHECK: call void @inner2
%ptr = alloca i32
call void @inner2(i32* %ptr)
ret void
}
; %D poisons this call, scalar-repl can't handle that instruction.
define void @inner2(i32 *%ptr) {
%A = load i32* %ptr
store i32 0, i32* %ptr
%C = getelementptr i32* %ptr, i32 0
%D = getelementptr i32* %ptr, i32 %A
%E = bitcast i32* %ptr to i8*
%F = select i1 false, i32* %ptr, i32* @glbl
call void @llvm.lifetime.start(i64 0, i8* %E)
ret void
}