scev: Better sign-extend removal. Normalize postincrement recurrences

so that their sign extended forms are congruent when no overflow occurs.

llvm-svn: 132360
This commit is contained in:
Andrew Trick 2011-05-31 21:17:47 +00:00
parent 8f0cd597f1
commit 812276eed4
2 changed files with 114 additions and 48 deletions

View File

@ -1035,6 +1035,93 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
return S;
}
// Get the limit of a recurrence such that incrementing by Step cannot cause
// signed overflow as long as the value of the recurrence within the loop does
// not exceed this limit before incrementing.
static const SCEV *getOverflowLimitForStep(const SCEV *Step,
ICmpInst::Predicate *Pred,
ScalarEvolution *SE) {
unsigned BitWidth = SE->getTypeSizeInBits(Step->getType());
if (SE->isKnownPositive(Step)) {
*Pred = ICmpInst::ICMP_SLT;
return SE->getConstant(APInt::getSignedMinValue(BitWidth) -
SE->getSignedRange(Step).getSignedMax());
}
if (SE->isKnownNegative(Step)) {
*Pred = ICmpInst::ICMP_SGT;
return SE->getConstant(APInt::getSignedMaxValue(BitWidth) -
SE->getSignedRange(Step).getSignedMin());
}
return 0;
}
// The recurrence AR has been shown to have no signed wrap. Typically, if we can
// prove NSW for AR, then we can just as easily prove NSW for its preincrement
// or postincrement sibling. This allows normalizing a sign extended AddRec as
// such: {sext(Step + Start),+,Step} => {(Step + sext(Start),+,Step} As a
// result, the expression "Step + sext(PreIncAR)" is congruent with
// "sext(PostIncAR)"
static const SCEV *getPreStartForSignExtend(const SCEVAddRecExpr *AR,
const Type *Ty,
ScalarEvolution *SE) {
const Loop *L = AR->getLoop();
const SCEV *Start = AR->getStart();
const SCEV *Step = AR->getStepRecurrence(*SE);
// Check for a simple looking step prior to loop entry.
const SCEVAddExpr *SA = dyn_cast<SCEVAddExpr>(Start);
if (!SA || SA->getNumOperands() != 2 || SA->getOperand(0) != Step)
return 0;
// This is a postinc AR. Check for overflow on the preinc recurrence using the
// same three conditions that getSignExtendedExpr checks.
// 1. NSW flags on the step increment.
const SCEV *PreStart = SA->getOperand(1);
const SCEVAddRecExpr *PreAR = dyn_cast<SCEVAddRecExpr>(
SE->getAddRecExpr(PreStart, Step, L, SCEV::FlagAnyWrap));
if (PreAR && PreAR->getNoWrapFlags(SCEV::FlagNSW)) {
return PreStart;
}
// 2. Direct overflow check on the step operation's expression.
unsigned BitWidth = SE->getTypeSizeInBits(AR->getType());
const Type *WideTy = IntegerType::get(SE->getContext(), BitWidth * 2);
const SCEV *OperandExtendedStart =
SE->getAddExpr(SE->getSignExtendExpr(PreStart, WideTy),
SE->getSignExtendExpr(Step, WideTy));
if (SE->getSignExtendExpr(Start, WideTy) == OperandExtendedStart) {
// Cache knowledge of PreAR NSW.
if (PreAR)
const_cast<SCEVAddRecExpr *>(PreAR)->setNoWrapFlags(SCEV::FlagNSW);
// FIXME: this optimization needs a unit test
DEBUG(dbgs() << "SCEV: untested prestart overflow check\n");
return PreStart;
}
// 3. Loop precondition.
ICmpInst::Predicate Pred;
const SCEV *OverflowLimit = getOverflowLimitForStep(Step, &Pred, SE);
if (SE->isLoopEntryGuardedByCond(L, Pred, PreStart, OverflowLimit)) {
return PreStart;
}
return 0;
}
// Get the normalized sign-extended expression for this AddRec's Start.
static const SCEV *getSignExtendAddRecStart(const SCEVAddRecExpr *AR,
const Type *Ty,
ScalarEvolution *SE) {
const SCEV *PreStart = getPreStartForSignExtend(AR, Ty, SE);
if (!PreStart)
return SE->getSignExtendExpr(AR->getStart(), Ty);
return SE->getAddExpr(SE->getSignExtendExpr(AR->getStepRecurrence(*SE), Ty),
SE->getSignExtendExpr(PreStart, Ty));
}
const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
const Type *Ty) {
assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
@ -1097,7 +1184,7 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
// If we have special knowledge that this addrec won't overflow,
// we don't need to do any further analysis.
if (AR->getNoWrapFlags(SCEV::FlagNSW))
return getAddRecExpr(getSignExtendExpr(Start, Ty),
return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this),
getSignExtendExpr(Step, Ty),
L, SCEV::FlagNSW);
@ -1133,7 +1220,7 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
// Cache knowledge of AR NSW, which is propagated to this AddRec.
const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
// Return the expression with the addrec on the outside.
return getAddRecExpr(getSignExtendExpr(Start, Ty),
return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this),
getSignExtendExpr(Step, Ty),
L, AR->getNoWrapFlags());
}
@ -1149,7 +1236,7 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
// Cache knowledge of AR NSW, which is propagated to this AddRec.
const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
// Return the expression with the addrec on the outside.
return getAddRecExpr(getSignExtendExpr(Start, Ty),
return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this),
getZeroExtendExpr(Step, Ty),
L, AR->getNoWrapFlags());
}
@ -1159,34 +1246,18 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
// the addrec is safe. Also, if the entry is guarded by a comparison
// with the start value and the backedge is guarded by a comparison
// with the post-inc value, the addrec is safe.
if (isKnownPositive(Step)) {
const SCEV *N = getConstant(APInt::getSignedMinValue(BitWidth) -
getSignedRange(Step).getSignedMax());
if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SLT, AR, N) ||
(isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SLT, Start, N) &&
isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SLT,
AR->getPostIncExpr(*this), N))) {
// Cache knowledge of AR NSW, which is propagated to this AddRec.
const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
// Return the expression with the addrec on the outside.
return getAddRecExpr(getSignExtendExpr(Start, Ty),
getSignExtendExpr(Step, Ty),
L, AR->getNoWrapFlags());
}
} else if (isKnownNegative(Step)) {
const SCEV *N = getConstant(APInt::getSignedMaxValue(BitWidth) -
getSignedRange(Step).getSignedMin());
if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SGT, AR, N) ||
(isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SGT, Start, N) &&
isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SGT,
AR->getPostIncExpr(*this), N))) {
// Cache knowledge of AR NSW, which is propagated to this AddRec.
const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
// Return the expression with the addrec on the outside.
return getAddRecExpr(getSignExtendExpr(Start, Ty),
getSignExtendExpr(Step, Ty),
L, AR->getNoWrapFlags());
}
ICmpInst::Predicate Pred;
const SCEV *OverflowLimit = getOverflowLimitForStep(Step, &Pred, this);
if (OverflowLimit &&
(isLoopBackedgeGuardedByCond(L, Pred, AR, OverflowLimit) ||
(isLoopEntryGuardedByCond(L, Pred, Start, OverflowLimit) &&
isLoopBackedgeGuardedByCond(L, Pred, AR->getPostIncExpr(*this),
OverflowLimit)))) {
// Cache knowledge of AR NSW, then propagate NSW to the wide AddRec.
const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this),
getSignExtendExpr(Step, Ty),
L, AR->getNoWrapFlags());
}
}
}

View File

@ -2,9 +2,8 @@
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
; Test reusing the same IV with constant start for preinc and postinc values
; with and without NSW.
; IV rewrite only removes one sext. WidenIVs should remove all three.
; IV with constant start, preinc and postinc sign extends, with and without NSW.
; IV rewrite only removes one sext. WidenIVs removes all three.
define void @postincConstIV(i8* %base, i32 %limit) nounwind {
entry:
br label %loop
@ -33,21 +32,19 @@ return:
ret void
}
; Test reusing the same IV with nonconstant start for preinc and postinc values
; IV with nonconstant start, preinc and postinc sign extends,
; with and without NSW.
; As with constant IV start, WidenIVs should remove all three.
;
; FIXME: WidenIVs should remove %postofs just like %postofsnsw
; As with postincConstIV, WidenIVs removes all three sexts.
define void @postincVarIV(i8* %base, i32 %init, i32 %limit) nounwind {
entry:
br label %loop
%precond = icmp sgt i32 %limit, %init
br i1 %precond, label %loop, label %return
; CHECK: loop:
; CHECK: sext
; CHECK-NOT: sext
; CHECK: exit:
loop:
%iv = phi i32 [ %postiv, %loop ], [ %init, %entry ]
%ivnsw = phi i32 [ %postivnsw, %loop ], [ 0, %entry ]
%ivnsw = phi i32 [ %postivnsw, %loop ], [ %init, %entry ]
%preofs = sext i32 %iv to i64
%preadr = getelementptr i8* %base, i64 %preofs
store i8 0, i8* %preadr
@ -59,7 +56,7 @@ loop:
%postofsnsw = sext i32 %postivnsw to i64
%postadrnsw = getelementptr i8* %base, i64 %postofsnsw
store i8 0, i8* %postadrnsw
%cond = icmp sgt i32 %limit, %iv
%cond = icmp sgt i32 %limit, %postiv
br i1 %cond, label %loop, label %exit
exit:
br label %return
@ -103,15 +100,13 @@ innerpreheader:
; CHECK: innerloop:
;
; Eliminate %ofs2 after widening inneriv.
; Eliminate %ofs3 after normalizing sext(innerpostiv)
; CHECK-NOT: sext
; CHECK: getelementptr
;
; FIXME: We should not increase the number of IVs in this loop.
; sext elimination plus LFTR results in 3 final IVs.
;
; FIXME: eliminate %ofs3 based the loop pre/post conditions
; even though innerpostiv is not NSW, thus sign extending innerpostiv
; does not yield the same expression as incrementing the widened inneriv.
; FIXME: We should check that indvars does not increase the number of
; IVs in this loop. sext elimination plus LFTR currently results in 2 final
; IVs. Waiting to remove LFTR.
innerloop:
%inneriv = phi i32 [ %innerpostiv, %innerloop ], [ %innercount, %innerpreheader ]
%innerpostiv = add i32 %inneriv, 1