From 8c129d768789439445ed641d13b63e396cedc0a2 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Thu, 16 Jul 2009 17:34:36 +0000 Subject: [PATCH] Fill in some holes in ScalarEvolution's loop iteration condition analysis. This allows indvars to emit a simpler loop trip count expression. llvm-svn: 76085 --- llvm/lib/Analysis/ScalarEvolution.cpp | 29 +++++++++++++- .../Transforms/IndVarSimplify/lftr-promote.ll | 38 +++++++++++++++++++ 2 files changed, 65 insertions(+), 2 deletions(-) create mode 100644 llvm/test/Transforms/IndVarSimplify/lftr-promote.ll diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 7c68f8920e09..dcd6558d34b2 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -960,6 +960,22 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, return getAddRecExpr(getSignExtendExpr(Start, Ty), getSignExtendExpr(Step, Ty), L); + + // Similar to above, only this time treat the step value as unsigned. + // This covers loops that count up with an unsigned step. + const SCEV *UMul = + getMulExpr(CastedMaxBECount, + getTruncateOrZeroExtend(Step, Start->getType())); + Add = getAddExpr(Start, UMul); + OperandExtendedAdd = + getAddExpr(getZeroExtendExpr(Start, WideTy), + getMulExpr(getZeroExtendExpr(CastedMaxBECount, WideTy), + getZeroExtendExpr(Step, WideTy))); + if (getZeroExtendExpr(Add, WideTy) == OperandExtendedAdd) + // Return the expression with the addrec on the outside. + return getAddRecExpr(getSignExtendExpr(Start, Ty), + getZeroExtendExpr(Step, Ty), + L); } // If the backedge is guarded by a comparison with the pre-inc value @@ -4248,7 +4264,7 @@ bool ScalarEvolution::isKnownPredicate(ICmpInst::Predicate Pred, switch (Pred) { default: - assert(0 && "Unexpected ICmpInst::Predicate value!"); + llvm_unreachable("Unexpected ICmpInst::Predicate value!"); break; case ICmpInst::ICMP_SGT: Pred = ICmpInst::ICMP_SLT; @@ -4556,23 +4572,32 @@ ScalarEvolution::isNecessaryCondOperands(ICmpInst::Predicate Pred, const SCEV *FoundLHS, const SCEV *FoundRHS) { switch (Pred) { - default: break; + default: llvm_unreachable("Unexpected ICmpInst::Predicate value!"); + case ICmpInst::ICMP_EQ: + case ICmpInst::ICMP_NE: + if (HasSameValue(LHS, FoundLHS) && HasSameValue(RHS, FoundRHS)) + return true; + break; case ICmpInst::ICMP_SLT: + case ICmpInst::ICMP_SLE: if (isKnownPredicate(ICmpInst::ICMP_SLE, LHS, FoundLHS) && isKnownPredicate(ICmpInst::ICMP_SGE, RHS, FoundRHS)) return true; break; case ICmpInst::ICMP_SGT: + case ICmpInst::ICMP_SGE: if (isKnownPredicate(ICmpInst::ICMP_SGE, LHS, FoundLHS) && isKnownPredicate(ICmpInst::ICMP_SLE, RHS, FoundRHS)) return true; break; case ICmpInst::ICMP_ULT: + case ICmpInst::ICMP_ULE: if (isKnownPredicate(ICmpInst::ICMP_ULE, LHS, FoundLHS) && isKnownPredicate(ICmpInst::ICMP_UGE, RHS, FoundRHS)) return true; break; case ICmpInst::ICMP_UGT: + case ICmpInst::ICMP_UGE: if (isKnownPredicate(ICmpInst::ICMP_UGE, LHS, FoundLHS) && isKnownPredicate(ICmpInst::ICMP_ULE, RHS, FoundRHS)) return true; diff --git a/llvm/test/Transforms/IndVarSimplify/lftr-promote.ll b/llvm/test/Transforms/IndVarSimplify/lftr-promote.ll new file mode 100644 index 000000000000..b2cb770dd612 --- /dev/null +++ b/llvm/test/Transforms/IndVarSimplify/lftr-promote.ll @@ -0,0 +1,38 @@ +; RUN: llvm-as < %s | opt -indvars | llvm-dis | grep add | count 1 + +; Indvars should be able to compute the exit value of this loop +; without any additional arithmetic. The only add needed should +; be the canonical IV increment. + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" + +define void @foo(double* %p, i32 %n) nounwind { +entry: + %0 = icmp sgt i32 %n, 0 ; [#uses=1] + br i1 %0, label %bb.nph, label %return + +bb.nph: ; preds = %entry + br label %bb2 + +bb2: ; preds = %bb3, %bb.nph + %i.01 = phi i32 [ %7, %bb3 ], [ 0, %bb.nph ] ; [#uses=3] + %1 = sext i32 %i.01 to i64 ; [#uses=1] + %2 = getelementptr double* %p, i64 %1 ; [#uses=1] + %3 = load double* %2, align 8 ; [#uses=1] + %4 = fmul double %3, 1.100000e+00 ; [#uses=1] + %5 = sext i32 %i.01 to i64 ; [#uses=1] + %6 = getelementptr double* %p, i64 %5 ; [#uses=1] + store double %4, double* %6, align 8 + %7 = add i32 %i.01, 1 ; [#uses=2] + br label %bb3 + +bb3: ; preds = %bb2 + %8 = icmp slt i32 %7, %n ; [#uses=1] + br i1 %8, label %bb2, label %bb3.return_crit_edge + +bb3.return_crit_edge: ; preds = %bb3 + br label %return + +return: ; preds = %bb3.return_crit_edge, %entry + ret void +}