[LoopVectorize] Handling induction variable with non-constant step.

Allow vectorization when the step is a loop-invariant variable.
This is the loop example that is getting vectorized after the patch:

 int int_inc;
 int bar(int init, int *restrict A, int N) {

  int x = init;
  for (int i=0;i<N;i++){
    A[i] = x;
    x += int_inc;
  }
  return x;
 }

"x" is an induction variable with *loop-invariant* step.
But it is not a primary induction. Primary induction variable with non-constant step is not handled yet.

Differential Revision: http://reviews.llvm.org/D19258

llvm-svn: 269023
This commit is contained in:
Elena Demikhovsky 2016-05-10 07:33:35 +00:00
parent 7360d8a9cc
commit c434d091c5
4 changed files with 250 additions and 54 deletions

View File

@ -269,7 +269,7 @@ public:
public:
/// Default constructor - creates an invalid induction.
InductionDescriptor()
: StartValue(nullptr), IK(IK_NoInduction), StepValue(nullptr) {}
: StartValue(nullptr), IK(IK_NoInduction), Step(nullptr) {}
/// Get the consecutive direction. Returns:
/// 0 - unknown or non-consecutive.
@ -283,11 +283,13 @@ public:
/// For pointer induction, returns StartValue[Index * StepValue].
/// FIXME: The newly created binary instructions should contain nsw/nuw
/// flags, which can be found from the original scalar operations.
Value *transform(IRBuilder<> &B, Value *Index) const;
Value *transform(IRBuilder<> &B, Value *Index, ScalarEvolution *SE,
const DataLayout& DL) const;
Value *getStartValue() const { return StartValue; }
InductionKind getKind() const { return IK; }
ConstantInt *getStepValue() const { return StepValue; }
const SCEV *getStep() const { return Step; }
ConstantInt *getConstIntStepValue() const;
/// Returns true if \p Phi is an induction. If \p Phi is an induction,
/// the induction descriptor \p D will contain the data describing this
@ -308,14 +310,14 @@ public:
private:
/// Private constructor - used by \c isInductionPHI.
InductionDescriptor(Value *Start, InductionKind K, ConstantInt *Step);
InductionDescriptor(Value *Start, InductionKind K, const SCEV *Step);
/// Start value.
TrackingVH<Value> StartValue;
/// Induction kind.
InductionKind IK;
/// Step value.
ConstantInt *StepValue;
const SCEV *Step;
};
BasicBlock *InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI,

View File

@ -16,6 +16,7 @@
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
#include "llvm/IR/Dominators.h"
@ -653,45 +654,77 @@ Value *RecurrenceDescriptor::createMinMaxOp(IRBuilder<> &Builder,
}
InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K,
ConstantInt *Step)
: StartValue(Start), IK(K), StepValue(Step) {
const SCEV *Step)
: StartValue(Start), IK(K), Step(Step) {
assert(IK != IK_NoInduction && "Not an induction");
// Start value type should match the induction kind and the value
// itself should not be null.
assert(StartValue && "StartValue is null");
assert(StepValue && !StepValue->isZero() && "StepValue is zero");
assert((IK != IK_PtrInduction || StartValue->getType()->isPointerTy()) &&
"StartValue is not a pointer for pointer induction");
assert((IK != IK_IntInduction || StartValue->getType()->isIntegerTy()) &&
"StartValue is not an integer for integer induction");
assert(StepValue->getType()->isIntegerTy() &&
"StepValue is not an integer");
// Check the Step Value. It should be non-zero integer value.
assert((!getConstIntStepValue() || !getConstIntStepValue()->isZero()) &&
"Step value is zero");
assert((IK != IK_PtrInduction || getConstIntStepValue()) &&
"Step value should be constant for pointer induction");
assert(Step->getType()->isIntegerTy() && "StepValue is not an integer");
}
int InductionDescriptor::getConsecutiveDirection() const {
if (StepValue && (StepValue->isOne() || StepValue->isMinusOne()))
return StepValue->getSExtValue();
ConstantInt *ConstStep = getConstIntStepValue();
if (ConstStep && (ConstStep->isOne() || ConstStep->isMinusOne()))
return ConstStep->getSExtValue();
return 0;
}
Value *InductionDescriptor::transform(IRBuilder<> &B, Value *Index) const {
ConstantInt *InductionDescriptor::getConstIntStepValue() const {
if (isa<SCEVConstant>(Step))
return dyn_cast<ConstantInt>(cast<SCEVConstant>(Step)->getValue());
return nullptr;
}
Value *InductionDescriptor::transform(IRBuilder<> &B, Value *Index,
ScalarEvolution *SE,
const DataLayout& DL) const {
SCEVExpander Exp(*SE, DL, "induction");
switch (IK) {
case IK_IntInduction:
case IK_IntInduction: {
assert(Index->getType() == StartValue->getType() &&
"Index type does not match StartValue type");
if (StepValue->isMinusOne())
// FIXME: Theoretically, we can call getAddExpr() of ScalarEvolution
// and calculate (Start + Index * Step) for all cases, without
// special handling for "isOne" and "isMinusOne".
// But in the real life the result code getting worse. We mix SCEV
// expressions and ADD/SUB operations and receive redundant
// intermediate values being calculated in different ways and
// Instcombine is unable to reduce them all.
if (getConstIntStepValue() &&
getConstIntStepValue()->isMinusOne())
return B.CreateSub(StartValue, Index);
if (!StepValue->isOne())
Index = B.CreateMul(Index, StepValue);
if (getConstIntStepValue() &&
getConstIntStepValue()->isOne())
return B.CreateAdd(StartValue, Index);
case IK_PtrInduction:
assert(Index->getType() == StepValue->getType() &&
const SCEV *S = SE->getAddExpr(SE->getSCEV(StartValue),
SE->getMulExpr(Step, SE->getSCEV(Index)));
return Exp.expandCodeFor(S, StartValue->getType(), &*B.GetInsertPoint());
}
case IK_PtrInduction: {
assert(Index->getType() == Step->getType() &&
"Index type does not match StepValue type");
if (StepValue->isMinusOne())
Index = B.CreateNeg(Index);
else if (!StepValue->isOne())
Index = B.CreateMul(Index, StepValue);
assert(isa<SCEVConstant>(Step) &&
"Expected constant step for pointer induction");
const SCEV *S = SE->getMulExpr(SE->getSCEV(Index), Step);
Index = Exp.expandCodeFor(S, Index->getType(), &*B.GetInsertPoint());
return B.CreateGEP(nullptr, StartValue, Index);
}
case IK_NoInduction:
return nullptr;
}
@ -746,17 +779,22 @@ bool InductionDescriptor::isInductionPHI(PHINode *Phi,
Phi->getIncomingValueForBlock(AR->getLoop()->getLoopPreheader());
const SCEV *Step = AR->getStepRecurrence(*SE);
// Calculate the pointer stride and check if it is consecutive.
const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);
if (!C)
// The stride may be a constant or a loop invariant integer value.
const SCEVConstant *ConstStep = dyn_cast<SCEVConstant>(Step);
if (!ConstStep && !SE->isLoopInvariant(Step, AR->getLoop()))
return false;
ConstantInt *CV = C->getValue();
if (PhiTy->isIntegerTy()) {
D = InductionDescriptor(StartValue, IK_IntInduction, CV);
D = InductionDescriptor(StartValue, IK_IntInduction, Step);
return true;
}
assert(PhiTy->isPointerTy() && "The PHI must be a pointer");
// Pointer induction should be a constant.
if (!ConstStep)
return false;
ConstantInt *CV = ConstStep->getValue();
Type *PointerElementType = PhiTy->getPointerElementType();
// The pointer stride cannot be determined if the pointer element type is not
// sized.
@ -771,8 +809,8 @@ bool InductionDescriptor::isInductionPHI(PHINode *Phi,
int64_t CVSize = CV->getSExtValue();
if (CVSize % Size)
return false;
auto *StepValue = ConstantInt::getSigned(CV->getType(), CVSize / Size);
auto *StepValue = SE->getConstant(CV->getType(), CVSize / Size,
true /* signed */);
D = InductionDescriptor(StartValue, IK_PtrInduction, StepValue);
return true;
}

View File

@ -416,6 +416,12 @@ protected:
/// to each vector element of Val. The sequence starts at StartIndex.
virtual Value *getStepVector(Value *Val, int StartIdx, Value *Step);
/// This function adds (StartIdx, StartIdx + Step, StartIdx + 2*Step, ...)
/// to each vector element of Val. The sequence starts at StartIndex.
/// Step is a SCEV. In order to get StepValue it takes the existing value
/// from SCEV or creates a new using SCEVExpander.
virtual Value *getStepVector(Value *Val, int StartIdx, const SCEV *Step);
/// When we go over instructions in the basic block we rely on previous
/// values within the current basic block or on loop invariant values.
/// When we widen (vectorize) values we place them in the map. If the values
@ -600,6 +606,7 @@ private:
void vectorizeMemoryInstruction(Instruction *Instr) override;
Value *getBroadcastInstrs(Value *V) override;
Value *getStepVector(Value *Val, int StartIdx, Value *Step) override;
Value *getStepVector(Value *Val, int StartIdx, const SCEV *StepSCEV) override;
Value *reverseVector(Value *Vec) override;
};
@ -2075,6 +2082,15 @@ Value *InnerLoopVectorizer::getBroadcastInstrs(Value *V) {
return Shuf;
}
Value *InnerLoopVectorizer::getStepVector(Value *Val, int StartIdx,
const SCEV *StepSCEV) {
const DataLayout &DL = OrigLoop->getHeader()->getModule()->getDataLayout();
SCEVExpander Exp(*PSE.getSE(), DL, "induction");
Value *StepValue = Exp.expandCodeFor(StepSCEV, StepSCEV->getType(),
&*Builder.GetInsertPoint());
return getStepVector(Val, StartIdx, StepValue);
}
Value *InnerLoopVectorizer::getStepVector(Value *Val, int StartIdx,
Value *Step) {
assert(Val->getType()->isVectorTy() && "Must be a vector");
@ -3141,9 +3157,10 @@ void InnerLoopVectorizer::createEmptyLoop() {
EndValue = CountRoundDown;
} else {
IRBuilder<> B(LoopBypassBlocks.back()->getTerminator());
Value *CRD = B.CreateSExtOrTrunc(
CountRoundDown, II.getStepValue()->getType(), "cast.crd");
EndValue = II.transform(B, CRD);
Value *CRD = B.CreateSExtOrTrunc(CountRoundDown,
II.getStep()->getType(), "cast.crd");
const DataLayout &DL = OrigLoop->getHeader()->getModule()->getDataLayout();
EndValue = II.transform(B, CRD, PSE.getSE(), DL);
EndValue->setName("ind.end");
}
@ -4035,6 +4052,7 @@ void InnerLoopVectorizer::widenPHIInstruction(
assert(Legal->getInductionVars()->count(P) && "Not an induction variable");
InductionDescriptor II = Legal->getInductionVars()->lookup(P);
const DataLayout &DL = OrigLoop->getHeader()->getModule()->getDataLayout();
// FIXME: The newly created binary instructions should contain nsw/nuw flags,
// which can be found from the original scalar operations.
@ -4048,14 +4066,14 @@ void InnerLoopVectorizer::widenPHIInstruction(
Value *V = Induction;
if (P != OldInduction) {
V = Builder.CreateSExtOrTrunc(Induction, P->getType());
V = II.transform(Builder, V);
V = II.transform(Builder, V, PSE.getSE(), DL);
V->setName("offset.idx");
}
Value *Broadcasted = getBroadcastInstrs(V);
// After broadcasting the induction variable we need to make the vector
// consecutive by adding 0, 1, 2, etc.
for (unsigned part = 0; part < UF; ++part)
Entry[part] = getStepVector(Broadcasted, VF * part, II.getStepValue());
Entry[part] = getStepVector(Broadcasted, VF * part, II.getStep());
return;
}
case InductionDescriptor::IK_PtrInduction:
@ -4063,7 +4081,7 @@ void InnerLoopVectorizer::widenPHIInstruction(
assert(P->getType()->isPointerTy() && "Unexpected type.");
// This is the normalized GEP that starts counting at zero.
Value *PtrInd = Induction;
PtrInd = Builder.CreateSExtOrTrunc(PtrInd, II.getStepValue()->getType());
PtrInd = Builder.CreateSExtOrTrunc(PtrInd, II.getStep()->getType());
// This is the vector of results. Notice that we don't generate
// vector geps because scalar geps result in better code.
for (unsigned part = 0; part < UF; ++part) {
@ -4071,7 +4089,7 @@ void InnerLoopVectorizer::widenPHIInstruction(
int EltIndex = part;
Constant *Idx = ConstantInt::get(PtrInd->getType(), EltIndex);
Value *GlobalIdx = Builder.CreateAdd(PtrInd, Idx);
Value *SclrGep = II.transform(Builder, GlobalIdx);
Value *SclrGep = II.transform(Builder, GlobalIdx, PSE.getSE(), DL);
SclrGep->setName("next.gep");
Entry[part] = SclrGep;
continue;
@ -4082,7 +4100,7 @@ void InnerLoopVectorizer::widenPHIInstruction(
int EltIndex = i + part * VF;
Constant *Idx = ConstantInt::get(PtrInd->getType(), EltIndex);
Value *GlobalIdx = Builder.CreateAdd(PtrInd, Idx);
Value *SclrGep = II.transform(Builder, GlobalIdx);
Value *SclrGep = II.transform(Builder, GlobalIdx, PSE.getSE(), DL);
SclrGep->setName("next.gep");
VecVal = Builder.CreateInsertElement(VecVal, SclrGep,
Builder.getInt32(i), "insert.gep");
@ -4218,24 +4236,27 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
case Instruction::BitCast: {
CastInst *CI = dyn_cast<CastInst>(it);
setDebugLocFromInst(Builder, &*it);
/// Optimize the special case where the source is the induction
/// variable. Notice that we can only optimize the 'trunc' case
/// Optimize the special case where the source is a constant integer
/// induction variable. Notice that we can only optimize the 'trunc' case
/// because: a. FP conversions lose precision, b. sext/zext may wrap,
/// c. other casts depend on pointer size.
if (CI->getOperand(0) == OldInduction &&
it->getOpcode() == Instruction::Trunc) {
Value *ScalarCast =
Builder.CreateCast(CI->getOpcode(), Induction, CI->getType());
Value *Broadcasted = getBroadcastInstrs(ScalarCast);
InductionDescriptor II =
Legal->getInductionVars()->lookup(OldInduction);
Constant *Step = ConstantInt::getSigned(
CI->getType(), II.getStepValue()->getSExtValue());
if (auto StepValue = II.getConstIntStepValue()) {
StepValue = ConstantInt::getSigned(cast<IntegerType>(CI->getType()),
StepValue->getSExtValue());
Value *ScalarCast = Builder.CreateCast(CI->getOpcode(), Induction,
CI->getType());
Value *Broadcasted = getBroadcastInstrs(ScalarCast);
for (unsigned Part = 0; Part < UF; ++Part)
Entry[Part] = getStepVector(Broadcasted, VF * Part, Step);
Entry[Part] = getStepVector(Broadcasted, VF * Part, StepValue);
addMetadata(Entry, &*it);
break;
}
}
/// Vectorize casts.
Type *DestTy =
(VF == 1) ? CI->getType() : VectorType::get(CI->getType(), VF);
@ -4596,9 +4617,11 @@ bool LoopVectorizationLegality::addInductionPhi(PHINode *Phi,
// Int inductions are special because we only allow one IV.
if (ID.getKind() == InductionDescriptor::IK_IntInduction &&
ID.getStepValue()->isOne() &&
ID.getConstIntStepValue() &&
ID.getConstIntStepValue()->isOne() &&
isa<Constant>(ID.getStartValue()) &&
cast<Constant>(ID.getStartValue())->isNullValue()) {
// Use the phi node with the widest type as induction. Use the last
// one if there are multiple (no good reason for doing this other
// than it is expedient). We've checked that it begins at zero and
@ -6235,6 +6258,15 @@ Value *InnerLoopUnroller::reverseVector(Value *Vec) { return Vec; }
Value *InnerLoopUnroller::getBroadcastInstrs(Value *V) { return V; }
Value *InnerLoopUnroller::getStepVector(Value *Val, int StartIdx,
const SCEV *StepSCEV) {
const DataLayout &DL = OrigLoop->getHeader()->getModule()->getDataLayout();
SCEVExpander Exp(*PSE.getSE(), DL, "induction");
Value *StepValue = Exp.expandCodeFor(StepSCEV, StepSCEV->getType(),
&*Builder.GetInsertPoint());
return getStepVector(Val, StartIdx, StepValue);
}
Value *InnerLoopUnroller::getStepVector(Value *Val, int StartIdx, Value *Step) {
// When unrolling and the VF is 1, we only need to add a simple scalar.
Type *ITy = Val->getType();

View File

@ -0,0 +1,124 @@
; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=8 -S | FileCheck %s
; int int_inc;
;
;int induction_with_global(int init, int *restrict A, int N) {
; int x = init;
; for (int i=0;i<N;i++){
; A[i] = x;
; x += int_inc;
; }
; return x;
;}
; CHECK-LABEL: @induction_with_global(
; CHECK: %[[INT_INC:.*]] = load i32, i32* @int_inc, align 4
; CHECK: vector.body:
; CHECK: %[[VAR1:.*]] = insertelement <8 x i32> undef, i32 %[[INT_INC]], i32 0
; CHECK: %[[VAR2:.*]] = shufflevector <8 x i32> %[[VAR1]], <8 x i32> undef, <8 x i32> zeroinitializer
; CHECK: mul <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, %[[VAR2]]
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
@int_inc = common global i32 0, align 4
define i32 @induction_with_global(i32 %init, i32* noalias nocapture %A, i32 %N) {
entry:
%cmp4 = icmp sgt i32 %N, 0
br i1 %cmp4, label %for.body.lr.ph, label %for.end
for.body.lr.ph: ; preds = %entry
%0 = load i32, i32* @int_inc, align 4
%1 = mul i32 %0, %N
br label %for.body
for.body: ; preds = %for.body, %for.body.lr.ph
%indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
%x.05 = phi i32 [ %init, %for.body.lr.ph ], [ %add, %for.body ]
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
store i32 %x.05, i32* %arrayidx, align 4
%add = add nsw i32 %0, %x.05
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp eq i32 %lftr.wideiv, %N
br i1 %exitcond, label %for.end.loopexit, label %for.body
for.end.loopexit: ; preds = %for.body
%2 = add i32 %1, %init
br label %for.end
for.end: ; preds = %for.end.loopexit, %entry
%x.0.lcssa = phi i32 [ %init, %entry ], [ %2, %for.end.loopexit ]
ret i32 %x.0.lcssa
}
;int induction_with_loop_inv(int init, int *restrict A, int N, int M) {
; int x = init;
; for (int j = 0; j < M; j++) {
; for (int i=0; i<N; i++){
; A[i] = x;
; x += j; // induction step is a loop invariant variable
; }
; }
; return x;
;}
; CHECK-LABEL: @induction_with_loop_inv(
; CHECK: for.cond1.preheader:
; CHECK: %[[INDVAR0:.*]] = phi i32 [ 0,
; CHECK: %[[INDVAR1:.*]] = phi i32 [ 0,
; CHECK: vector.body:
; CHECK: %[[VAR1:.*]] = insertelement <8 x i32> undef, i32 %[[INDVAR1]], i32 0
; CHECK: %[[VAR2:.*]] = shufflevector <8 x i32> %[[VAR1]], <8 x i32> undef, <8 x i32> zeroinitializer
; CHECK: mul <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, %[[VAR2]]
define i32 @induction_with_loop_inv(i32 %init, i32* noalias nocapture %A, i32 %N, i32 %M) {
entry:
%cmp10 = icmp sgt i32 %M, 0
br i1 %cmp10, label %for.cond1.preheader.lr.ph, label %for.end6
for.cond1.preheader.lr.ph: ; preds = %entry
%cmp27 = icmp sgt i32 %N, 0
br label %for.cond1.preheader
for.cond1.preheader: ; preds = %for.inc4, %for.cond1.preheader.lr.ph
%indvars.iv15 = phi i32 [ 0, %for.cond1.preheader.lr.ph ], [ %indvars.iv.next16, %for.inc4 ]
%j.012 = phi i32 [ 0, %for.cond1.preheader.lr.ph ], [ %inc5, %for.inc4 ]
%x.011 = phi i32 [ %init, %for.cond1.preheader.lr.ph ], [ %x.1.lcssa, %for.inc4 ]
br i1 %cmp27, label %for.body3.preheader, label %for.inc4
for.body3.preheader: ; preds = %for.cond1.preheader
br label %for.body3
for.body3: ; preds = %for.body3.preheader, %for.body3
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body3 ], [ 0, %for.body3.preheader ]
%x.18 = phi i32 [ %add, %for.body3 ], [ %x.011, %for.body3.preheader ]
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
store i32 %x.18, i32* %arrayidx, align 4
%add = add nsw i32 %x.18, %j.012
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp eq i32 %lftr.wideiv, %N
br i1 %exitcond, label %for.inc4.loopexit, label %for.body3
for.inc4.loopexit: ; preds = %for.body3
%0 = add i32 %x.011, %indvars.iv15
br label %for.inc4
for.inc4: ; preds = %for.inc4.loopexit, %for.cond1.preheader
%x.1.lcssa = phi i32 [ %x.011, %for.cond1.preheader ], [ %0, %for.inc4.loopexit ]
%inc5 = add nuw nsw i32 %j.012, 1
%indvars.iv.next16 = add i32 %indvars.iv15, %N
%exitcond17 = icmp eq i32 %inc5, %M
br i1 %exitcond17, label %for.end6.loopexit, label %for.cond1.preheader
for.end6.loopexit: ; preds = %for.inc4
%x.1.lcssa.lcssa = phi i32 [ %x.1.lcssa, %for.inc4 ]
br label %for.end6
for.end6: ; preds = %for.end6.loopexit, %entry
%x.0.lcssa = phi i32 [ %init, %entry ], [ %x.1.lcssa.lcssa, %for.end6.loopexit ]
ret i32 %x.0.lcssa
}