From 203eaaf5badb8f697d37c76aaff414846bf626c3 Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Fri, 22 Jun 2018 22:58:55 +0000 Subject: [PATCH] [LoopReroll] Rewrite induction variable rewriting. This gets rid of a bunch of weird special cases; instead, just use SCEV rewriting for everything. In addition to being simpler, this fixes a bug where we would use the wrong stride in certain edge cases. The one bit I'm not quite sure about is the trip count handling, specifically the FIXME about overflow. In general, I think we need to widen the exit condition, but that's probably not profitable if the new type isn't legal, so we probably need a check somewhere. That said, I don't think I'm making the existing problem any worse. As a followup to this, a bunch of IV-related code in root-finding could be cleaned up; with SCEV-based rewriting, there isn't any reason to assume a loop will have exactly one or two PHI nodes. Differential Revision: https://reviews.llvm.org/D45191 llvm-svn: 335400 --- llvm/lib/Transforms/Scalar/LoopRerollPass.cpp | 234 +++++------------- llvm/test/Transforms/LoopReroll/basic.ll | 87 +++++-- .../Transforms/LoopReroll/complex_reroll.ll | 41 +-- .../Transforms/LoopReroll/indvar_with_ext.ll | 18 +- .../test/Transforms/LoopReroll/nonconst_lb.ll | 34 ++- llvm/test/Transforms/LoopReroll/ptrindvar.ll | 4 +- llvm/test/Transforms/LoopReroll/reduction.ll | 12 +- 7 files changed, 178 insertions(+), 252 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp b/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp index 74672f13334a..9a99e5925572 100644 --- a/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp +++ b/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp @@ -69,10 +69,6 @@ using namespace llvm; STATISTIC(NumRerolledLoops, "Number of rerolled loops"); -static cl::opt -MaxInc("max-reroll-increment", cl::init(2048), cl::Hidden, - cl::desc("The maximum increment for loop rerolling")); - static cl::opt NumToleratedFailedMatches("reroll-num-tolerated-failed-matches", cl::init(400), cl::Hidden, @@ -398,8 +394,8 @@ namespace { /// Stage 3: Assuming validate() returned true, perform the /// replacement. - /// @param IterCount The maximum iteration count of L. - void replace(const SCEV *IterCount); + /// @param BackedgeTakenCount The backedge-taken count of L. + void replace(const SCEV *BackedgeTakenCount); protected: using UsesTy = MapVector; @@ -429,8 +425,7 @@ namespace { bool instrDependsOn(Instruction *I, UsesTy::iterator Start, UsesTy::iterator End); - void replaceIV(Instruction *Inst, Instruction *IV, const SCEV *IterCount); - void updateNonLoopCtrlIncr(); + void replaceIV(DAGRootSet &DRS, const SCEV *Start, const SCEV *IncrExpr); LoopReroll *Parent; @@ -483,8 +478,8 @@ namespace { void collectPossibleIVs(Loop *L, SmallInstructionVector &PossibleIVs); void collectPossibleReductions(Loop *L, ReductionTracker &Reductions); - bool reroll(Instruction *IV, Loop *L, BasicBlock *Header, const SCEV *IterCount, - ReductionTracker &Reductions); + bool reroll(Instruction *IV, Loop *L, BasicBlock *Header, + const SCEV *BackedgeTakenCount, ReductionTracker &Reductions); }; } // end anonymous namespace @@ -511,48 +506,6 @@ static bool hasUsesOutsideLoop(Instruction *I, Loop *L) { return false; } -static const SCEVConstant *getIncrmentFactorSCEV(ScalarEvolution *SE, - const SCEV *SCEVExpr, - Instruction &IV) { - const SCEVMulExpr *MulSCEV = dyn_cast(SCEVExpr); - - // If StepRecurrence of a SCEVExpr is a constant (c1 * c2, c2 = sizeof(ptr)), - // Return c1. - if (!MulSCEV && IV.getType()->isPointerTy()) - if (const SCEVConstant *IncSCEV = dyn_cast(SCEVExpr)) { - const PointerType *PTy = cast(IV.getType()); - Type *ElTy = PTy->getElementType(); - const SCEV *SizeOfExpr = - SE->getSizeOfExpr(SE->getEffectiveSCEVType(IV.getType()), ElTy); - if (IncSCEV->getValue()->getValue().isNegative()) { - const SCEV *NewSCEV = - SE->getUDivExpr(SE->getNegativeSCEV(SCEVExpr), SizeOfExpr); - return dyn_cast(SE->getNegativeSCEV(NewSCEV)); - } else { - return dyn_cast(SE->getUDivExpr(SCEVExpr, SizeOfExpr)); - } - } - - if (!MulSCEV) - return nullptr; - - // If StepRecurrence of a SCEVExpr is a c * sizeof(x), where c is constant, - // Return c. - const SCEVConstant *CIncSCEV = nullptr; - for (const SCEV *Operand : MulSCEV->operands()) { - if (const SCEVConstant *Constant = dyn_cast(Operand)) { - CIncSCEV = Constant; - } else if (const SCEVUnknown *Unknown = dyn_cast(Operand)) { - Type *AllocTy; - if (!Unknown->isSizeOf(AllocTy)) - break; - } else { - return nullptr; - } - } - return CIncSCEV; -} - // Check if an IV is only used to control the loop. There are two cases: // 1. It only has one use which is loop increment, and the increment is only // used by comparison and the PHI (could has sext with nsw in between), and the @@ -633,16 +586,8 @@ void LoopReroll::collectPossibleIVs(Loop *L, continue; if (!PHISCEV->isAffine()) continue; - const SCEVConstant *IncSCEV = nullptr; - if (I->getType()->isPointerTy()) - IncSCEV = - getIncrmentFactorSCEV(SE, PHISCEV->getStepRecurrence(*SE), *I); - else - IncSCEV = dyn_cast(PHISCEV->getStepRecurrence(*SE)); + auto IncSCEV = dyn_cast(PHISCEV->getStepRecurrence(*SE)); if (IncSCEV) { - const APInt &AInt = IncSCEV->getValue()->getValue().abs(); - if (IncSCEV->getValue()->isZero() || AInt.uge(MaxInc)) - continue; IVToIncMap[&*I] = IncSCEV->getValue()->getSExtValue(); LLVM_DEBUG(dbgs() << "LRR: Possible IV: " << *I << " = " << *PHISCEV << "\n"); @@ -1463,8 +1408,20 @@ bool LoopReroll::DAGRootTracker::validate(ReductionTracker &Reductions) { return true; } -void LoopReroll::DAGRootTracker::replace(const SCEV *IterCount) { +void LoopReroll::DAGRootTracker::replace(const SCEV *BackedgeTakenCount) { BasicBlock *Header = L->getHeader(); + + // Compute the start and increment for each BaseInst before we start erasing + // instructions. + SmallVector StartExprs; + SmallVector IncrExprs; + for (auto &DRS : RootSets) { + const SCEVAddRecExpr *IVSCEV = + cast(SE->getSCEV(DRS.BaseInst)); + StartExprs.push_back(IVSCEV->getStart()); + IncrExprs.push_back(SE->getMinusSCEV(SE->getSCEV(DRS.Roots[0]), IVSCEV)); + } + // Remove instructions associated with non-base iterations. for (BasicBlock::reverse_iterator J = Header->rbegin(), JE = Header->rend(); J != JE;) { @@ -1478,74 +1435,47 @@ void LoopReroll::DAGRootTracker::replace(const SCEV *IterCount) { ++J; } - bool HasTwoIVs = LoopControlIV && LoopControlIV != IV; + // Rewrite each BaseInst using SCEV. + for (size_t i = 0, e = RootSets.size(); i != e; ++i) + // Insert the new induction variable. + replaceIV(RootSets[i], StartExprs[i], IncrExprs[i]); - if (HasTwoIVs) { - updateNonLoopCtrlIncr(); - replaceIV(LoopControlIV, LoopControlIV, IterCount); - } else - // We need to create a new induction variable for each different BaseInst. - for (auto &DRS : RootSets) - // Insert the new induction variable. - replaceIV(DRS.BaseInst, IV, IterCount); + { // Limit the lifetime of SCEVExpander. + BranchInst *BI = cast(Header->getTerminator()); + const DataLayout &DL = Header->getModule()->getDataLayout(); + SCEVExpander Expander(*SE, DL, "reroll"); + auto Zero = SE->getZero(BackedgeTakenCount->getType()); + auto One = SE->getOne(BackedgeTakenCount->getType()); + auto NewIVSCEV = SE->getAddRecExpr(Zero, One, L, SCEV::FlagAnyWrap); + Value *NewIV = + Expander.expandCodeFor(NewIVSCEV, BackedgeTakenCount->getType(), + Header->getFirstNonPHIOrDbg()); + // FIXME: This arithmetic can overflow. + auto TripCount = SE->getAddExpr(BackedgeTakenCount, One); + auto ScaledTripCount = SE->getMulExpr( + TripCount, SE->getConstant(BackedgeTakenCount->getType(), Scale)); + auto ScaledBECount = SE->getMinusSCEV(ScaledTripCount, One); + Value *TakenCount = + Expander.expandCodeFor(ScaledBECount, BackedgeTakenCount->getType(), + Header->getFirstNonPHIOrDbg()); + Value *Cond = + new ICmpInst(BI, CmpInst::ICMP_EQ, NewIV, TakenCount, "exitcond"); + BI->setCondition(Cond); + + if (BI->getSuccessor(1) != Header) + BI->swapSuccessors(); + } SimplifyInstructionsInBlock(Header, TLI); DeleteDeadPHIs(Header, TLI); } -// For non-loop-control IVs, we only need to update the last increment -// with right amount, then we are done. -void LoopReroll::DAGRootTracker::updateNonLoopCtrlIncr() { - const SCEV *NewInc = nullptr; - for (auto *LoopInc : LoopIncs) { - GetElementPtrInst *GEP = dyn_cast(LoopInc); - const SCEVConstant *COp = nullptr; - if (GEP && LoopInc->getOperand(0)->getType()->isPointerTy()) { - COp = dyn_cast(SE->getSCEV(LoopInc->getOperand(1))); - } else { - COp = dyn_cast(SE->getSCEV(LoopInc->getOperand(0))); - if (!COp) - COp = dyn_cast(SE->getSCEV(LoopInc->getOperand(1))); - } - - assert(COp && "Didn't find constant operand of LoopInc!\n"); - - const APInt &AInt = COp->getValue()->getValue(); - const SCEV *ScaleSCEV = SE->getConstant(COp->getType(), Scale); - if (AInt.isNegative()) { - NewInc = SE->getNegativeSCEV(COp); - NewInc = SE->getUDivExpr(NewInc, ScaleSCEV); - NewInc = SE->getNegativeSCEV(NewInc); - } else - NewInc = SE->getUDivExpr(COp, ScaleSCEV); - - LoopInc->setOperand(1, dyn_cast(NewInc)->getValue()); - } -} - -void LoopReroll::DAGRootTracker::replaceIV(Instruction *Inst, - Instruction *InstIV, - const SCEV *IterCount) { +void LoopReroll::DAGRootTracker::replaceIV(DAGRootSet &DRS, + const SCEV *Start, + const SCEV *IncrExpr) { BasicBlock *Header = L->getHeader(); - int64_t Inc = IVToIncMap[InstIV]; - bool NeedNewIV = InstIV == LoopControlIV; - bool Negative = !NeedNewIV && Inc < 0; + Instruction *Inst = DRS.BaseInst; - const SCEVAddRecExpr *RealIVSCEV = cast(SE->getSCEV(Inst)); - const SCEV *Start = RealIVSCEV->getStart(); - - if (NeedNewIV) - Start = SE->getConstant(Start->getType(), 0); - - const SCEV *SizeOfExpr = nullptr; - const SCEV *IncrExpr = - SE->getConstant(RealIVSCEV->getType(), Negative ? -1 : 1); - if (auto *PTy = dyn_cast(Inst->getType())) { - Type *ElTy = PTy->getElementType(); - SizeOfExpr = - SE->getSizeOfExpr(SE->getEffectiveSCEVType(Inst->getType()), ElTy); - IncrExpr = SE->getMulExpr(IncrExpr, SizeOfExpr); - } const SCEV *NewIVSCEV = SE->getAddRecExpr(Start, IncrExpr, L, SCEV::FlagAnyWrap); @@ -1558,54 +1488,6 @@ void LoopReroll::DAGRootTracker::replaceIV(Instruction *Inst, for (auto &KV : Uses) if (KV.second.find_first() == 0) KV.first->replaceUsesOfWith(Inst, NewIV); - - if (BranchInst *BI = dyn_cast(Header->getTerminator())) { - // FIXME: Why do we need this check? - if (Uses[BI].find_first() == IL_All) { - const SCEV *ICSCEV = RealIVSCEV->evaluateAtIteration(IterCount, *SE); - - if (NeedNewIV) - ICSCEV = SE->getMulExpr(IterCount, - SE->getConstant(IterCount->getType(), Scale)); - - // Iteration count SCEV minus or plus 1 - const SCEV *MinusPlus1SCEV = - SE->getConstant(ICSCEV->getType(), Negative ? -1 : 1); - if (Inst->getType()->isPointerTy()) { - assert(SizeOfExpr && "SizeOfExpr is not initialized"); - MinusPlus1SCEV = SE->getMulExpr(MinusPlus1SCEV, SizeOfExpr); - } - - const SCEV *ICMinusPlus1SCEV = SE->getMinusSCEV(ICSCEV, MinusPlus1SCEV); - // Iteration count minus 1 - Instruction *InsertPtr = nullptr; - if (isa(ICMinusPlus1SCEV)) { - InsertPtr = BI; - } else { - BasicBlock *Preheader = L->getLoopPreheader(); - if (!Preheader) - Preheader = InsertPreheaderForLoop(L, DT, LI, PreserveLCSSA); - InsertPtr = Preheader->getTerminator(); - } - - if (!isa(NewIV->getType()) && NeedNewIV && - (SE->getTypeSizeInBits(NewIV->getType()) < - SE->getTypeSizeInBits(ICMinusPlus1SCEV->getType()))) { - IRBuilder<> Builder(BI); - Builder.SetCurrentDebugLocation(BI->getDebugLoc()); - NewIV = Builder.CreateSExt(NewIV, ICMinusPlus1SCEV->getType()); - } - Value *ICMinusPlus1 = Expander.expandCodeFor( - ICMinusPlus1SCEV, NewIV->getType(), InsertPtr); - - Value *Cond = - new ICmpInst(BI, CmpInst::ICMP_EQ, NewIV, ICMinusPlus1, "exitcond"); - BI->setCondition(Cond); - - if (BI->getSuccessor(1) != Header) - BI->swapSuccessors(); - } - } } } @@ -1722,7 +1604,7 @@ void LoopReroll::ReductionTracker::replaceSelected() { // f(%iv) or part of some f(%iv.i). If all of that is true (and all reductions // have been validated), then we reroll the loop. bool LoopReroll::reroll(Instruction *IV, Loop *L, BasicBlock *Header, - const SCEV *IterCount, + const SCEV *BackedgeTakenCount, ReductionTracker &Reductions) { DAGRootTracker DAGRoots(this, L, IV, SE, AA, TLI, DT, LI, PreserveLCSSA, IVToIncMap, LoopControlIV); @@ -1740,7 +1622,7 @@ bool LoopReroll::reroll(Instruction *IV, Loop *L, BasicBlock *Header, // making changes! Reductions.replaceSelected(); - DAGRoots.replace(IterCount); + DAGRoots.replace(BackedgeTakenCount); ++NumRerolledLoops; return true; @@ -1769,10 +1651,10 @@ bool LoopReroll::runOnLoop(Loop *L, LPPassManager &LPM) { if (!SE->hasLoopInvariantBackedgeTakenCount(L)) return false; - const SCEV *LIBETC = SE->getBackedgeTakenCount(L); - const SCEV *IterCount = SE->getAddExpr(LIBETC, SE->getOne(LIBETC->getType())); + const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L); LLVM_DEBUG(dbgs() << "\n Before Reroll:\n" << *(L->getHeader()) << "\n"); - LLVM_DEBUG(dbgs() << "LRR: iteration count = " << *IterCount << "\n"); + LLVM_DEBUG(dbgs() << "LRR: backedge-taken count = " << *BackedgeTakenCount + << "\n"); // First, we need to find the induction variable with respect to which we can // reroll (there may be several possible options). @@ -1793,7 +1675,7 @@ bool LoopReroll::runOnLoop(Loop *L, LPPassManager &LPM) { // For each possible IV, collect the associated possible set of 'root' nodes // (i+1, i+2, etc.). for (Instruction *PossibleIV : PossibleIVs) - if (reroll(PossibleIV, L, Header, IterCount, Reductions)) { + if (reroll(PossibleIV, L, Header, BackedgeTakenCount, Reductions)) { Changed = true; break; } diff --git a/llvm/test/Transforms/LoopReroll/basic.ll b/llvm/test/Transforms/LoopReroll/basic.ll index 096b17b303c5..6e2f2fcabf13 100644 --- a/llvm/test/Transforms/LoopReroll/basic.ll +++ b/llvm/test/Transforms/LoopReroll/basic.ll @@ -79,11 +79,12 @@ for.body: ; preds = %entry, %for.body ; CHECK: for.body: ; CHECK: %indvar = phi i64 [ %indvar.next, %for.body ], [ 0, %entry ] +; CHECK: %0 = trunc i64 %indvar to i32 ; CHECK: %call = tail call i32 @foo(i32 0) #1 ; CHECK: %arrayidx = getelementptr inbounds i32, i32* %x, i64 %indvar ; CHECK: store i32 %call, i32* %arrayidx, align 4 ; CHECK: %indvar.next = add i64 %indvar, 1 -; CHECK: %exitcond = icmp eq i64 %indvar, 1499 +; CHECK: %exitcond = icmp eq i32 %0, 1499 ; CHECK: br i1 %exitcond, label %for.end, label %for.body ; CHECK: ret @@ -205,15 +206,16 @@ for.body: ; preds = %entry, %for.body ; CHECK: for.body: ; CHECK: %indvar = phi i64 [ %indvar.next, %for.body ], [ 0, %entry ] +; CHECK: %0 = trunc i64 %indvar to i32 ; CHECK: %arrayidx = getelementptr inbounds float, float* %b, i64 %indvar -; CHECK: %0 = load float, float* %arrayidx, align 4 -; CHECK: %mul = fmul float %0, %alpha +; CHECK: %1 = load float, float* %arrayidx, align 4 +; CHECK: %mul = fmul float %1, %alpha ; CHECK: %arrayidx2 = getelementptr inbounds float, float* %a, i64 %indvar -; CHECK: %1 = load float, float* %arrayidx2, align 4 -; CHECK: %add = fadd float %1, %mul +; CHECK: %2 = load float, float* %arrayidx2, align 4 +; CHECK: %add = fadd float %2, %mul ; CHECK: store float %add, float* %arrayidx2, align 4 ; CHECK: %indvar.next = add i64 %indvar, 1 -; CHECK: %exitcond = icmp eq i64 %indvar, 3199 +; CHECK: %exitcond = icmp eq i32 %0, 3199 ; CHECK: br i1 %exitcond, label %for.end, label %for.body ; CHECK: ret @@ -302,18 +304,19 @@ for.body: ; preds = %entry, %for.body ; CHECK: for.body: ; CHECK: %indvar = phi i64 [ %indvar.next, %for.body ], [ 0, %entry ] +; CHECK: %0 = trunc i64 %indvar to i32 ; CHECK: %arrayidx = getelementptr inbounds i32, i32* %ip, i64 %indvar -; CHECK: %0 = load i32, i32* %arrayidx, align 4 -; CHECK: %idxprom1 = sext i32 %0 to i64 +; CHECK: %1 = load i32, i32* %arrayidx, align 4 +; CHECK: %idxprom1 = sext i32 %1 to i64 ; CHECK: %arrayidx2 = getelementptr inbounds float, float* %b, i64 %idxprom1 -; CHECK: %1 = load float, float* %arrayidx2, align 4 -; CHECK: %mul = fmul float %1, %alpha +; CHECK: %2 = load float, float* %arrayidx2, align 4 +; CHECK: %mul = fmul float %2, %alpha ; CHECK: %arrayidx4 = getelementptr inbounds float, float* %a, i64 %indvar -; CHECK: %2 = load float, float* %arrayidx4, align 4 -; CHECK: %add = fadd float %2, %mul +; CHECK: %3 = load float, float* %arrayidx4, align 4 +; CHECK: %add = fadd float %3, %mul ; CHECK: store float %add, float* %arrayidx4, align 4 ; CHECK: %indvar.next = add i64 %indvar, 1 -; CHECK: %exitcond = icmp eq i64 %indvar, 3199 +; CHECK: %exitcond = icmp eq i32 %0, 3199 ; CHECK: br i1 %exitcond, label %for.end, label %for.body ; CHECK: ret @@ -374,8 +377,8 @@ for.body: ; preds = %for.body, %entry ; CHECK: %arrayidx6 = getelementptr inbounds i32, i32* %x, i64 %0 ; CHECK: store i32 %call, i32* %arrayidx6, align 4 ; CHECK: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 -; CHECK: %exitcond2 = icmp eq i64 %0, 1505 -; CHECK: br i1 %exitcond2, label %for.end, label %for.body +; CHECK: %exitcond1 = icmp eq i64 %indvars.iv, 1499 +; CHECK: br i1 %exitcond1, label %for.end, label %for.body for.end: ; preds = %for.body ret void @@ -434,8 +437,8 @@ for.body: ; preds = %for.body, %entry ; CHECK: %arrayidx6 = getelementptr inbounds i32, i32* %x, i64 %0 ; CHECK: store i32 %call, i32* %arrayidx6, align 4 ; CHECK: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 -; CHECK: %exitcond2 = icmp eq i64 %indvars.iv, 1499 -; CHECK: br i1 %exitcond2, label %for.end, label %for.body +; CHECK: %exitcond1 = icmp eq i64 %indvars.iv, 1499 +; CHECK: br i1 %exitcond1, label %for.end, label %for.body for.end: ; preds = %for.body ret void @@ -481,7 +484,7 @@ for.body: ; preds = %for.body, %entry ; CHECK: %arrayidx = getelementptr inbounds i32, i32* %x, i64 %0 ; CHECK: store i32 %call, i32* %arrayidx, align 4 ; CHECK: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 -; CHECK: %exitcond1 = icmp eq i64 %0, 1502 +; CHECK: %exitcond1 = icmp eq i64 %indvars.iv, 1499 ; CHECK: br i1 %exitcond1, label %for.end, label %for.body for.end: ; preds = %for.body @@ -599,8 +602,8 @@ for.body: ; preds = %for.body, %entry ; CHECK-NEXT: %scevgep = getelementptr i32, i32* %x, i64 %indvars.iv ; CHECK-NEXT: store i32 %call, i32* %scevgep, align 4 ; CHECK-NEXT: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 -; CHECK-NEXT: %exitcond2 = icmp eq i32* %scevgep, %scevgep1 -; CHECK-NEXT: br i1 %exitcond2, label %for.end, label %for.body +; CHECK-NEXT: %exitcond1 = icmp eq i64 %indvars.iv, 1499 +; CHECK-NEXT: br i1 %exitcond1, label %for.end, label %for.body for.end: ; preds = %for.body ret void @@ -738,6 +741,50 @@ for.end: ret void } +define void @pointer_bitcast_baseinst(i16* %arg, i8* %arg1, i64 %arg2) { +; CHECK-LABEL: @pointer_bitcast_baseinst( +; CHECK: bb3: +; CHECK-NEXT: %indvar = phi i64 [ %indvar.next, %bb3 ], [ 0, %bb ] +; CHECK-NEXT: %4 = shl i64 %indvar, 3 +; CHECK-NEXT: %5 = add i64 %4, 1 +; CHECK-NEXT: %tmp5 = shl nuw i64 %5, 1 +; CHECK-NEXT: %tmp6 = getelementptr i8, i8* %arg1, i64 %tmp5 +; CHECK-NEXT: %tmp7 = bitcast i8* %tmp6 to <8 x i16>* +; CHECK-NEXT: %tmp8 = load <8 x i16>, <8 x i16>* %tmp7, align 2 +; CHECK-NEXT: %tmp13 = getelementptr i16, i16* %arg, i64 %5 +; CHECK-NEXT: %tmp14 = bitcast i16* %tmp13 to <8 x i16>* +; CHECK-NEXT: store <8 x i16> %tmp8, <8 x i16>* %tmp14, align 2 +; CHECK-NEXT: %indvar.next = add i64 %indvar, 1 +; CHECK-NEXT: %exitcond = icmp eq i64 %indvar, %3 +; CHECK-NEXT: br i1 %exitcond, label %bb19, label %bb3 +bb: + br label %bb3 + +bb3: ; preds = %bb3, %bb + %tmp = phi i64 [ 1, %bb ], [ %tmp17, %bb3 ] + %tmp4 = add nuw i64 %tmp, 8 + %tmp5 = shl nuw i64 %tmp, 1 + %tmp6 = getelementptr i8, i8* %arg1, i64 %tmp5 + %tmp7 = bitcast i8* %tmp6 to <8 x i16>* + %tmp8 = load <8 x i16>, <8 x i16>* %tmp7, align 2 + %tmp9 = shl i64 %tmp4, 1 + %tmp10 = getelementptr i8, i8* %arg1, i64 %tmp9 + %tmp11 = bitcast i8* %tmp10 to <8 x i16>* + %tmp12 = load <8 x i16>, <8 x i16>* %tmp11, align 2 + %tmp13 = getelementptr i16, i16* %arg, i64 %tmp + %tmp14 = bitcast i16* %tmp13 to <8 x i16>* + store <8 x i16> %tmp8, <8 x i16>* %tmp14, align 2 + %tmp15 = getelementptr i16, i16* %arg, i64 %tmp4 + %tmp16 = bitcast i16* %tmp15 to <8 x i16>* + store <8 x i16> %tmp12, <8 x i16>* %tmp16, align 2 + %tmp17 = add nuw nsw i64 %tmp, 16 + %tmp18 = icmp eq i64 %tmp17, %arg2 + br i1 %tmp18, label %bb19, label %bb3 + +bb19: ; preds = %bb3 + ret void +} + attributes #0 = { nounwind uwtable } attributes #1 = { nounwind } diff --git a/llvm/test/Transforms/LoopReroll/complex_reroll.ll b/llvm/test/Transforms/LoopReroll/complex_reroll.ll index 3a2c72215781..7dea5b7b3f86 100644 --- a/llvm/test/Transforms/LoopReroll/complex_reroll.ll +++ b/llvm/test/Transforms/LoopReroll/complex_reroll.ll @@ -10,15 +10,15 @@ entry: while.body: ;CHECK-LABEL: while.body: -;CHECK-NEXT: %indvar = phi i32 [ %indvar.next, %while.body ], [ 0, %entry ] -;CHECK-NEXT: %buf.021 = phi i8* [ getelementptr inbounds ([16 x i8], [16 x i8]* @aaa, i64 0, i64 0), %entry ], [ %add.ptr, %while.body ] +;CHECK-NEXT: %indvar = phi i64 [ %indvar.next, %while.body ], [ 0, %entry ] ;CHECK-NEXT: %sum44.020 = phi i64 [ 0, %entry ], [ %add, %while.body ] -;CHECK-NEXT: [[T2:%[0-9]+]] = load i8, i8* %buf.021, align 1 +;CHECK-NEXT: %0 = trunc i64 %indvar to i32 +;CHECK-NEXT: %scevgep = getelementptr [16 x i8], [16 x i8]* @aaa, i64 0, i64 %indvar +;CHECK-NEXT: [[T2:%[0-9]+]] = load i8, i8* %scevgep, align 1 ;CHECK-NEXT: %conv = zext i8 [[T2]] to i64 ;CHECK-NEXT: %add = add i64 %conv, %sum44.020 -;CHECK-NEXT: %add.ptr = getelementptr inbounds i8, i8* %buf.021, i64 1 -;CHECK-NEXT: %indvar.next = add i32 %indvar, 1 -;CHECK-NEXT: %exitcond = icmp eq i32 %indvar, 1 +;CHECK-NEXT: %indvar.next = add i64 %indvar, 1 +;CHECK-NEXT: %exitcond = icmp eq i32 %0, 15 ;CHECK-NEXT: br i1 %exitcond, label %while.end, label %while.body %dec22 = phi i32 [ 4, %entry ], [ %dec, %while.body ] @@ -67,14 +67,14 @@ for.cond.cleanup: for.body: ;CHECK-LABEL: for.body: -;CHECK-NEXT: %indvar = phi i32 [ %indvar.next, %for.body ], [ 0, %for.body.lr.ph ] +;CHECK-NEXT: %indvar = phi i64 [ %indvar.next, %for.body ], [ 0, %for.body.lr.ph ] ;CHECK-NEXT: %S.addr.011 = phi i32 [ %S, %for.body.lr.ph ], [ %add, %for.body ] -;CHECK-NEXT: %a.addr.010 = phi i32* [ %a, %for.body.lr.ph ], [ %incdec.ptr1, %for.body ] -;CHECK-NEXT: %4 = load i32, i32* %a.addr.010, align 4 -;CHECK-NEXT: %add = add nsw i32 %4, %S.addr.011 -;CHECK-NEXT: %incdec.ptr1 = getelementptr inbounds i32, i32* %a.addr.010, i64 1 -;CHECK-NEXT: %indvar.next = add i32 %indvar, 1 -;CHECK-NEXT: %exitcond = icmp eq i32 %indvar, %3 +;CHECK-NEXT: %4 = trunc i64 %indvar to i32 +;CHECK-NEXT: %scevgep = getelementptr i32, i32* %a, i64 %indvar +;CHECK-NEXT: %5 = load i32, i32* %scevgep, align 4 +;CHECK-NEXT: %add = add nsw i32 %5, %S.addr.011 +;CHECK-NEXT: %indvar.next = add i64 %indvar, 1 +;CHECK-NEXT: %exitcond = icmp eq i32 %4, %3 ;CHECK-NEXT: br i1 %exitcond, label %for.cond.for.cond.cleanup_crit_edge, label %for.body %i.012 = phi i32 [ 0, %for.body.lr.ph ], [ %add3, %for.body ] @@ -101,14 +101,15 @@ while.body.preheader: ; preds = %entry while.body: ; preds = %while.body.preheader, %while.body ;CHECK-LABEL: while.body: -;CHECK-NEXT: %indvar = phi i32 [ %indvar.next, %while.body ], [ 0, %while.body.preheader ] +;CHECK-NEXT: %indvar = phi i64 [ %indvar.next, %while.body ], [ 0, %while.body.preheader ] ;CHECK-NEXT: %S.012 = phi i32 [ %add, %while.body ], [ undef, %while.body.preheader ] -;CHECK-NEXT: %buf.addr.011 = phi i32* [ %add.ptr, %while.body ], [ %buf, %while.body.preheader ] -;CHECK-NEXT: %4 = load i32, i32* %buf.addr.011, align 4 -;CHECK-NEXT: %add = add nsw i32 %4, %S.012 -;CHECK-NEXT: %add.ptr = getelementptr inbounds i32, i32* %buf.addr.011, i64 -1 -;CHECK-NEXT: %indvar.next = add i32 %indvar, 1 -;CHECK-NEXT: %exitcond = icmp eq i32 %indvar, %3 +;CHECK-NEXT: %4 = trunc i64 %indvar to i32 +;CHECK-NEXT: %5 = mul i64 %indvar, -1 +;CHECK-NEXT: %scevgep = getelementptr i32, i32* %buf, i64 %5 +;CHECK-NEXT: %6 = load i32, i32* %scevgep, align 4 +;CHECK-NEXT: %add = add nsw i32 %6, %S.012 +;CHECK-NEXT: %indvar.next = add i64 %indvar, 1 +;CHECK-NEXT: %exitcond = icmp eq i32 %4, %3 ;CHECK-NEXT: br i1 %exitcond, label %while.end.loopexit, label %while.body %i.013 = phi i32 [ %sub, %while.body ], [ %len, %while.body.preheader ] diff --git a/llvm/test/Transforms/LoopReroll/indvar_with_ext.ll b/llvm/test/Transforms/LoopReroll/indvar_with_ext.ll index 7aae61433aed..59d5ea5ff87a 100644 --- a/llvm/test/Transforms/LoopReroll/indvar_with_ext.ll +++ b/llvm/test/Transforms/LoopReroll/indvar_with_ext.ll @@ -14,18 +14,16 @@ while.body.preheader: while.body: ;CHECK-LABEL: while.body: -;CHECK-NEXT: %indvars.iv.i423 = phi i64 [ %indvars.iv.next.i424, %while.body ], [ 0, %while.body.preheader ] -;CHECK-NEXT: [[T1:%[0-9]+]] = trunc i64 %indvars.iv.i423 to i32 -;CHECK-NEXT: %arrayidx62.i = getelementptr inbounds float, float* %arrayidx200, i64 %indvars.iv.i423 +;CHECK-NEXT: %indvar = phi i64 [ %indvar.next, %while.body ], [ 0, %while.body.preheader ] +;CHECK-NEXT: %arrayidx62.i = getelementptr inbounds float, float* %arrayidx200, i64 %indvar ;CHECK-NEXT: %t1 = load float, float* %arrayidx62.i, align 4 -;CHECK-NEXT: %arrayidx64.i = getelementptr inbounds float, float* %arrayidx164, i64 %indvars.iv.i423 +;CHECK-NEXT: %arrayidx64.i = getelementptr inbounds float, float* %arrayidx164, i64 %indvar ;CHECK-NEXT: %t2 = load float, float* %arrayidx64.i, align 4 ;CHECK-NEXT: %mul65.i = fmul fast float %t2, %t22 ;CHECK-NEXT: %add66.i = fadd fast float %mul65.i, %t1 ;CHECK-NEXT: store float %add66.i, float* %arrayidx62.i, align 4 -;CHECK-NEXT: %indvars.iv.next.i424 = add i64 %indvars.iv.i423, 1 -;CHECK-NEXT: [[T2:%[0-9]+]] = sext i32 [[T1]] to i64 -;CHECK-NEXT: %exitcond = icmp eq i64 [[T2]], %{{[0-9]+}} +;CHECK-NEXT: %indvar.next = add i64 %indvar, 1 +;CHECK-NEXT: %exitcond = icmp eq i64 %indvar, %{{[0-9]+}} ;CHECK-NEXT: br i1 %exitcond, label %while.end.loopexit, label %while.body %indvars.iv.i423 = phi i64 [ %indvars.iv.next.i424, %while.body ], [ 0, %while.body.preheader ] @@ -69,7 +67,7 @@ for.body.preheader: ; preds = %entry for.body: ; preds = %for.body.preheader, %for.body -;CHECK: for.body: +;CHECK-LABEL: for.body: ;CHECK-NEXT: %indvar = phi i64 [ %indvar.next, %for.body ], [ 0, %for.body.preheader ] ;CHECK-NEXT: %arrayidx = getelementptr inbounds i32, i32* %y, i64 %indvar ;CHECK-NEXT: [[T1:%[0-9]+]] = load i32, i32* %arrayidx, align 4 @@ -111,7 +109,7 @@ for.body.preheader: ; preds = %entry for.body: ; preds = %for.body.preheader, %for.body -;CHECK: for.body: +;CHECK-LABEL: for.body: ;CHECK: %add12 = add i8 %i.022, 2 ;CHECK-NEXT: %conv = sext i8 %add12 to i32 ;CHECK-NEXT: %cmp = icmp slt i32 %conv, %n @@ -153,7 +151,7 @@ for.body.preheader: ; preds = %entry for.body: ; preds = %for.body.preheader, %for.body -;CHECK: for.body: +;CHECK-LABEL: for.body: ;CHECK-NEXT: %indvar = phi i64 [ %indvar.next, %for.body ], [ 0, %for.body.preheader ] ;CHECK-NEXT: %arrayidx = getelementptr inbounds i32, i32* %y, i64 %indvar ;CHECK-NEXT: [[T1:%[0-9]+]] = load i32, i32* %arrayidx, align 4 diff --git a/llvm/test/Transforms/LoopReroll/nonconst_lb.ll b/llvm/test/Transforms/LoopReroll/nonconst_lb.ll index d3c9385a33b1..d52cc1211b86 100644 --- a/llvm/test/Transforms/LoopReroll/nonconst_lb.ll +++ b/llvm/test/Transforms/LoopReroll/nonconst_lb.ll @@ -53,20 +53,19 @@ for.end: ; preds = %for.body, %entry ; CHECK: %1 = sub i32 %0, %m ; CHECK: %2 = lshr i32 %1, 2 ; CHECK: %3 = shl i32 %2, 2 -; CHECK: %4 = add i32 %m, %3 -; CHECK: %5 = add i32 %4, 3 +; CHECK: %4 = add i32 %3, 3 ; CHECK: br label %for.body ; CHECK: for.body: ; preds = %for.body, %for.body.preheader ; CHECK: %indvar = phi i32 [ 0, %for.body.preheader ], [ %indvar.next, %for.body ] -; CHECK: %6 = add i32 %m, %indvar -; CHECK: %arrayidx = getelementptr inbounds i32, i32* %B, i32 %6 -; CHECK: %7 = load i32, i32* %arrayidx, align 4 -; CHECK: %mul = shl nsw i32 %7, 2 -; CHECK: %arrayidx2 = getelementptr inbounds i32, i32* %A, i32 %6 +; CHECK: %5 = add i32 %m, %indvar +; CHECK: %arrayidx = getelementptr inbounds i32, i32* %B, i32 %5 +; CHECK: %6 = load i32, i32* %arrayidx, align 4 +; CHECK: %mul = shl nsw i32 %6, 2 +; CHECK: %arrayidx2 = getelementptr inbounds i32, i32* %A, i32 %5 ; CHECK: store i32 %mul, i32* %arrayidx2, align 4 ; CHECK: %indvar.next = add i32 %indvar, 1 -; CHECK: %exitcond = icmp eq i32 %6, %5 +; CHECK: %exitcond = icmp eq i32 %indvar, %4 ; CHECK: br i1 %exitcond, label %for.end.loopexit, label %for.body ;void daxpy_ur(int n,float da,float *dx,float *dy) @@ -133,20 +132,19 @@ for.end: ; preds = %for.body, %entry ; CHECK: %1 = sub i32 %0, %rem ; CHECK: %2 = lshr i32 %1, 2 ; CHECK: %3 = shl i32 %2, 2 -; CHECK: %4 = add i32 %rem, %3 -; CHECK: %5 = add i32 %4, 3 +; CHECK: %4 = add i32 %3, 3 ; CHECK: br label %for.body ; CHECK: for.body: ; CHECK: %indvar = phi i32 [ 0, %for.body.preheader ], [ %indvar.next, %for.body ] -; CHECK: %6 = add i32 %rem, %indvar -; CHECK: %arrayidx = getelementptr inbounds float, float* %dy, i32 %6 -; CHECK: %7 = load float, float* %arrayidx, align 4 -; CHECK: %arrayidx1 = getelementptr inbounds float, float* %dx, i32 %6 -; CHECK: %8 = load float, float* %arrayidx1, align 4 -; CHECK: %mul = fmul float %8, %da -; CHECK: %add = fadd float %7, %mul +; CHECK: %5 = add i32 %rem, %indvar +; CHECK: %arrayidx = getelementptr inbounds float, float* %dy, i32 %5 +; CHECK: %6 = load float, float* %arrayidx, align 4 +; CHECK: %arrayidx1 = getelementptr inbounds float, float* %dx, i32 %5 +; CHECK: %7 = load float, float* %arrayidx1, align 4 +; CHECK: %mul = fmul float %7, %da +; CHECK: %add = fadd float %6, %mul ; CHECK: store float %add, float* %arrayidx, align 4 ; CHECK: %indvar.next = add i32 %indvar, 1 -; CHECK: %exitcond = icmp eq i32 %6, %5 +; CHECK: %exitcond = icmp eq i32 %indvar, %4 ; CHECK: br i1 %exitcond, label %for.end.loopexit, label %for.body diff --git a/llvm/test/Transforms/LoopReroll/ptrindvar.ll b/llvm/test/Transforms/LoopReroll/ptrindvar.ll index 05852bdca9ef..0a319ad35252 100644 --- a/llvm/test/Transforms/LoopReroll/ptrindvar.ll +++ b/llvm/test/Transforms/LoopReroll/ptrindvar.ll @@ -17,7 +17,7 @@ while.body: ;CHECK-NEXT: %4 = load i32, i32* %scevgep, align 4 ;CHECK-NEXT: %add = add nsw i32 %4, %S.011 ;CHECK-NEXT: %indvar.next = add i64 %indvar, 1 -;CHECK-NEXT: %exitcond = icmp eq i32* %scevgep, %scevgep5 +;CHECK-NEXT: %exitcond = icmp eq i64 %indvar, %3 ;CHECK-NEXT: br i1 %exitcond, label %while.end.loopexit, label %while.body %S.011 = phi i32 [ %add2, %while.body ], [ undef, %while.body.preheader ] @@ -57,7 +57,7 @@ while.body: ;CHECK-NEXT: %5 = load i32, i32* %scevgep, align 4 ;CHECK-NEXT: %add = add nsw i32 %5, %S.011 ;CHECK-NEXT: %indvar.next = add i64 %indvar, 1 -;CHECK-NEXT: %exitcond = icmp eq i32* %scevgep, %scevgep5 +;CHECK-NEXT: %exitcond = icmp eq i64 %indvar, %3 ;CHECK-NEXT: br i1 %exitcond, label %while.end.loopexit, label %while.body %S.011 = phi i32 [ %add2, %while.body ], [ undef, %while.body.preheader ] diff --git a/llvm/test/Transforms/LoopReroll/reduction.ll b/llvm/test/Transforms/LoopReroll/reduction.ll index 191518514b8d..b1e92478f264 100644 --- a/llvm/test/Transforms/LoopReroll/reduction.ll +++ b/llvm/test/Transforms/LoopReroll/reduction.ll @@ -35,10 +35,10 @@ for.body: ; preds = %entry, %for.body ; CHECK: %indvar = phi i64 [ %indvar.next, %for.body ], [ 0, %entry ] ; CHECK: %r.029 = phi i32 [ 0, %entry ], [ %add, %for.body ] ; CHECK: %arrayidx = getelementptr inbounds i32, i32* %x, i64 %indvar -; CHECK: %0 = load i32, i32* %arrayidx, align 4 -; CHECK: %add = add nsw i32 %0, %r.029 +; CHECK: %1 = load i32, i32* %arrayidx, align 4 +; CHECK: %add = add nsw i32 %1, %r.029 ; CHECK: %indvar.next = add i64 %indvar, 1 -; CHECK: %exitcond = icmp eq i64 %indvar, 399 +; CHECK: %exitcond = icmp eq i32 %0, 399 ; CHECK: br i1 %exitcond, label %for.end, label %for.body ; CHECK: ret @@ -80,10 +80,10 @@ for.body: ; preds = %entry, %for.body ; CHECK: %indvar = phi i64 [ %indvar.next, %for.body ], [ 0, %entry ] ; CHECK: %r.029 = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ] ; CHECK: %arrayidx = getelementptr inbounds float, float* %x, i64 %indvar -; CHECK: %0 = load float, float* %arrayidx, align 4 -; CHECK: %add = fadd float %0, %r.029 +; CHECK: %1 = load float, float* %arrayidx, align 4 +; CHECK: %add = fadd float %1, %r.029 ; CHECK: %indvar.next = add i64 %indvar, 1 -; CHECK: %exitcond = icmp eq i64 %indvar, 399 +; CHECK: %exitcond = icmp eq i32 %0, 399 ; CHECK: br i1 %exitcond, label %for.end, label %for.body ; CHECK: ret