Add a small gep optimization I noticed was missing while reading some IL.

llvm-svn: 136585
This commit is contained in:
Rafael Espindola 2011-07-31 04:43:41 +00:00
parent 163e7a73f1
commit a3a44f3fc3
2 changed files with 33 additions and 11 deletions

View File

@ -737,7 +737,15 @@ Type *InstCombiner::FindElementAtOffset(Type *Ty, int64_t Offset,
return Ty;
}
static bool shouldMergeGEPs(GEPOperator &GEP, GEPOperator &Src) {
// If this GEP has only 0 indices, it is the same pointer as
// Src. If Src is not a trivial GEP too, don't combine
// the indices.
if (GEP.hasAllZeroIndices() && !Src.hasAllZeroIndices() &&
!Src.hasOneUse())
return false;
return true;
}
Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
SmallVector<Value*, 8> Ops(GEP.op_begin(), GEP.op_end());
@ -785,21 +793,15 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// getelementptr instructions into a single instruction.
//
if (GEPOperator *Src = dyn_cast<GEPOperator>(PtrOp)) {
// If this GEP has only 0 indices, it is the same pointer as
// Src. If Src is not a trivial GEP too, don't combine
// the indices.
if (GEP.hasAllZeroIndices() && !Src->hasAllZeroIndices() &&
!Src->hasOneUse())
if (!shouldMergeGEPs(*cast<GEPOperator>(&GEP), *Src))
return 0;
// Note that if our source is a gep chain itself that we wait for that
// chain to be resolved before we perform this transformation. This
// avoids us creating a TON of code in some cases.
//
if (GetElementPtrInst *SrcGEP =
dyn_cast<GetElementPtrInst>(Src->getOperand(0)))
if (SrcGEP->getNumOperands() == 2)
if (GEPOperator *SrcGEP =
dyn_cast<GEPOperator>(Src->getOperand(0)))
if (SrcGEP->getNumOperands() == 2 && shouldMergeGEPs(*Src, *SrcGEP))
return 0; // Wait until our source is folded to completion.
SmallVector<Value*, 8> Indices;

View File

@ -472,3 +472,23 @@ entry:
; CHECK: @pr10322_f1
; CHECK: %tmp2 = getelementptr inbounds %pr10322_t* %arrayidx8, i64 0, i32 0
}
; Test that we combine the last two geps in this sequence, before we
; would wait for gep1 and gep2 to be combined and never combine 2 and 3.
%three_gep_t = type {i32}
%three_gep_t2 = type {%three_gep_t}
define void @three_gep_f(%three_gep_t2* %x) {
%gep1 = getelementptr %three_gep_t2* %x, i64 2
call void @three_gep_h(%three_gep_t2* %gep1)
%gep2 = getelementptr %three_gep_t2* %gep1, i64 0, i32 0
%gep3 = getelementptr %three_gep_t* %gep2, i64 0, i32 0
call void @three_gep_g(i32* %gep3)
; CHECK: @three_gep_f
; CHECK: %gep3 = getelementptr %three_gep_t2* %gep1, i64 0, i32 0, i32 0
ret void
}
declare void @three_gep_g(i32*)
declare void @three_gep_h(%three_gep_t2*)