Refactor 'vectorizeLoop' no functionality change.
This patch merges LoopVectorize of InnerLoopVectorizer and InnerLoopUnroller by adding checks for VF=1. This helps in erasing the Unroller code that is almost identical to the InnerLoopVectorizer code. llvm-svn: 189391
This commit is contained in:
parent
047c61510f
commit
6b41f7cc4c
|
@ -354,7 +354,6 @@ public:
|
||||||
InnerLoopVectorizer(OrigLoop, SE, LI, DT, DL, TLI, 1, UnrollFactor) { }
|
InnerLoopVectorizer(OrigLoop, SE, LI, DT, DL, TLI, 1, UnrollFactor) { }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
virtual void vectorizeLoop(LoopVectorizationLegality *Legal);
|
|
||||||
virtual void scalarizeInstruction(Instruction *Instr);
|
virtual void scalarizeInstruction(Instruction *Instr);
|
||||||
virtual void vectorizeMemoryInstruction(Instruction *Instr,
|
virtual void vectorizeMemoryInstruction(Instruction *Instr,
|
||||||
LoopVectorizationLegality *Legal);
|
LoopVectorizationLegality *Legal);
|
||||||
|
@ -2049,18 +2048,31 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
|
||||||
if (RdxDesc.Kind == LoopVectorizationLegality::RK_IntegerMinMax ||
|
if (RdxDesc.Kind == LoopVectorizationLegality::RK_IntegerMinMax ||
|
||||||
RdxDesc.Kind == LoopVectorizationLegality::RK_FloatMinMax) {
|
RdxDesc.Kind == LoopVectorizationLegality::RK_FloatMinMax) {
|
||||||
// MinMax reduction have the start value as their identify.
|
// MinMax reduction have the start value as their identify.
|
||||||
VectorStart = Identity = Builder.CreateVectorSplat(VF, RdxDesc.StartValue,
|
if (VF == 1) {
|
||||||
"minmax.ident");
|
VectorStart = Identity = RdxDesc.StartValue;
|
||||||
|
} else {
|
||||||
|
VectorStart = Identity = Builder.CreateVectorSplat(VF,
|
||||||
|
RdxDesc.StartValue,
|
||||||
|
"minmax.ident");
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
|
// Handle other reduction kinds:
|
||||||
Constant *Iden =
|
Constant *Iden =
|
||||||
LoopVectorizationLegality::getReductionIdentity(RdxDesc.Kind,
|
LoopVectorizationLegality::getReductionIdentity(RdxDesc.Kind,
|
||||||
VecTy->getScalarType());
|
VecTy->getScalarType());
|
||||||
Identity = ConstantVector::getSplat(VF, Iden);
|
if (VF == 1) {
|
||||||
|
Identity = Iden;
|
||||||
|
// This vector is the Identity vector where the first element is the
|
||||||
|
// incoming scalar reduction.
|
||||||
|
VectorStart = RdxDesc.StartValue;
|
||||||
|
} else {
|
||||||
|
Identity = ConstantVector::getSplat(VF, Iden);
|
||||||
|
|
||||||
// This vector is the Identity vector where the first element is the
|
// This vector is the Identity vector where the first element is the
|
||||||
// incoming scalar reduction.
|
// incoming scalar reduction.
|
||||||
VectorStart = Builder.CreateInsertElement(Identity,
|
VectorStart = Builder.CreateInsertElement(Identity,
|
||||||
RdxDesc.StartValue, Zero);
|
RdxDesc.StartValue, Zero);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Fix the vector-loop phi.
|
// Fix the vector-loop phi.
|
||||||
|
@ -2116,37 +2128,40 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
|
||||||
ReducedPartRdx, RdxParts[part]);
|
ReducedPartRdx, RdxParts[part]);
|
||||||
}
|
}
|
||||||
|
|
||||||
// VF is a power of 2 so we can emit the reduction using log2(VF) shuffles
|
if (VF > 1) {
|
||||||
// and vector ops, reducing the set of values being computed by half each
|
// VF is a power of 2 so we can emit the reduction using log2(VF) shuffles
|
||||||
// round.
|
// and vector ops, reducing the set of values being computed by half each
|
||||||
assert(isPowerOf2_32(VF) &&
|
// round.
|
||||||
"Reduction emission only supported for pow2 vectors!");
|
assert(isPowerOf2_32(VF) &&
|
||||||
Value *TmpVec = ReducedPartRdx;
|
"Reduction emission only supported for pow2 vectors!");
|
||||||
SmallVector<Constant*, 32> ShuffleMask(VF, 0);
|
Value *TmpVec = ReducedPartRdx;
|
||||||
for (unsigned i = VF; i != 1; i >>= 1) {
|
SmallVector<Constant*, 32> ShuffleMask(VF, 0);
|
||||||
// Move the upper half of the vector to the lower half.
|
for (unsigned i = VF; i != 1; i >>= 1) {
|
||||||
for (unsigned j = 0; j != i/2; ++j)
|
// Move the upper half of the vector to the lower half.
|
||||||
ShuffleMask[j] = Builder.getInt32(i/2 + j);
|
for (unsigned j = 0; j != i/2; ++j)
|
||||||
|
ShuffleMask[j] = Builder.getInt32(i/2 + j);
|
||||||
|
|
||||||
// Fill the rest of the mask with undef.
|
// Fill the rest of the mask with undef.
|
||||||
std::fill(&ShuffleMask[i/2], ShuffleMask.end(),
|
std::fill(&ShuffleMask[i/2], ShuffleMask.end(),
|
||||||
UndefValue::get(Builder.getInt32Ty()));
|
UndefValue::get(Builder.getInt32Ty()));
|
||||||
|
|
||||||
Value *Shuf =
|
Value *Shuf =
|
||||||
Builder.CreateShuffleVector(TmpVec,
|
Builder.CreateShuffleVector(TmpVec,
|
||||||
UndefValue::get(TmpVec->getType()),
|
UndefValue::get(TmpVec->getType()),
|
||||||
ConstantVector::get(ShuffleMask),
|
ConstantVector::get(ShuffleMask),
|
||||||
"rdx.shuf");
|
"rdx.shuf");
|
||||||
|
|
||||||
if (Op != Instruction::ICmp && Op != Instruction::FCmp)
|
if (Op != Instruction::ICmp && Op != Instruction::FCmp)
|
||||||
TmpVec = Builder.CreateBinOp((Instruction::BinaryOps)Op, TmpVec, Shuf,
|
TmpVec = Builder.CreateBinOp((Instruction::BinaryOps)Op, TmpVec, Shuf,
|
||||||
"bin.rdx");
|
"bin.rdx");
|
||||||
else
|
else
|
||||||
TmpVec = createMinMaxOp(Builder, RdxDesc.MinMaxKind, TmpVec, Shuf);
|
TmpVec = createMinMaxOp(Builder, RdxDesc.MinMaxKind, TmpVec, Shuf);
|
||||||
}
|
}
|
||||||
|
|
||||||
// The result is in the first element of the vector.
|
// The result is in the first element of the vector.
|
||||||
Value *Scalar0 = Builder.CreateExtractElement(TmpVec, Builder.getInt32(0));
|
ReducedPartRdx = Builder.CreateExtractElement(TmpVec,
|
||||||
|
Builder.getInt32(0));
|
||||||
|
}
|
||||||
|
|
||||||
// Now, we need to fix the users of the reduction variable
|
// Now, we need to fix the users of the reduction variable
|
||||||
// inside and outside of the scalar remainder loop.
|
// inside and outside of the scalar remainder loop.
|
||||||
|
@ -2165,7 +2180,7 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
|
||||||
// incoming bypass edge.
|
// incoming bypass edge.
|
||||||
if (LCSSAPhi->getIncomingValue(0) == RdxDesc.LoopExitInstr) {
|
if (LCSSAPhi->getIncomingValue(0) == RdxDesc.LoopExitInstr) {
|
||||||
// Add an edge coming from the bypass.
|
// Add an edge coming from the bypass.
|
||||||
LCSSAPhi->addIncoming(Scalar0, LoopMiddleBlock);
|
LCSSAPhi->addIncoming(ReducedPartRdx, LoopMiddleBlock);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}// end of the LCSSA phi scan.
|
}// end of the LCSSA phi scan.
|
||||||
|
@ -2177,7 +2192,7 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
|
||||||
assert(IncomingEdgeBlockIdx >= 0 && "Invalid block index");
|
assert(IncomingEdgeBlockIdx >= 0 && "Invalid block index");
|
||||||
// Pick the other block.
|
// Pick the other block.
|
||||||
int SelfEdgeBlockIdx = (IncomingEdgeBlockIdx ? 0 : 1);
|
int SelfEdgeBlockIdx = (IncomingEdgeBlockIdx ? 0 : 1);
|
||||||
(RdxPhi)->setIncomingValue(SelfEdgeBlockIdx, Scalar0);
|
(RdxPhi)->setIncomingValue(SelfEdgeBlockIdx, ReducedPartRdx);
|
||||||
(RdxPhi)->setIncomingValue(IncomingEdgeBlockIdx, RdxDesc.LoopExitInstr);
|
(RdxPhi)->setIncomingValue(IncomingEdgeBlockIdx, RdxDesc.LoopExitInstr);
|
||||||
}// end of for each redux variable.
|
}// end of for each redux variable.
|
||||||
|
|
||||||
|
@ -4788,155 +4803,6 @@ bool LoopVectorizationCostModel::isConsecutiveLoadOrStore(Instruction *Inst) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
|
||||||
InnerLoopUnroller::vectorizeLoop(LoopVectorizationLegality *Legal) {
|
|
||||||
// In order to support reduction variables we need to be able to unroll
|
|
||||||
// Phi nodes. Phi nodes have cycles, so we need to unroll them in two
|
|
||||||
// stages. See InnerLoopVectorizer::vectorizeLoop for more details.
|
|
||||||
PhiVector RdxPHIsToFix;
|
|
||||||
|
|
||||||
// Scan the loop in a topological order to ensure that defs are vectorized
|
|
||||||
// before users.
|
|
||||||
LoopBlocksDFS DFS(OrigLoop);
|
|
||||||
DFS.perform(LI);
|
|
||||||
|
|
||||||
// Unroll all of the blocks in the original loop.
|
|
||||||
for (LoopBlocksDFS::RPOIterator bb = DFS.beginRPO(), be = DFS.endRPO();
|
|
||||||
bb != be; ++bb)
|
|
||||||
vectorizeBlockInLoop(Legal, *bb, &RdxPHIsToFix);
|
|
||||||
|
|
||||||
// Create the 'reduced' values for each of the induction vars.
|
|
||||||
// The reduced values are the vector values that we scalarize and combine
|
|
||||||
// after the loop is finished.
|
|
||||||
for (PhiVector::iterator it = RdxPHIsToFix.begin(), e = RdxPHIsToFix.end();
|
|
||||||
it != e; ++it) {
|
|
||||||
PHINode *RdxPhi = *it;
|
|
||||||
assert(RdxPhi && "Unable to recover vectorized PHI");
|
|
||||||
|
|
||||||
// Find the reduction variable descriptor.
|
|
||||||
assert(Legal->getReductionVars()->count(RdxPhi) &&
|
|
||||||
"Unable to find the reduction variable");
|
|
||||||
LoopVectorizationLegality::ReductionDescriptor RdxDesc =
|
|
||||||
(*Legal->getReductionVars())[RdxPhi];
|
|
||||||
|
|
||||||
setDebugLocFromInst(Builder, RdxDesc.StartValue);
|
|
||||||
|
|
||||||
// We need to generate a reduction vector from the incoming scalar.
|
|
||||||
// To do so, we need to generate the 'identity' vector and overide
|
|
||||||
// one of the elements with the incoming scalar reduction. We need
|
|
||||||
// to do it in the vector-loop preheader.
|
|
||||||
Builder.SetInsertPoint(LoopBypassBlocks.front()->getTerminator());
|
|
||||||
|
|
||||||
// This is the vector-clone of the value that leaves the loop.
|
|
||||||
VectorParts &VectorExit = getVectorValue(RdxDesc.LoopExitInstr);
|
|
||||||
Type *VecTy = VectorExit[0]->getType();
|
|
||||||
|
|
||||||
// Find the reduction identity variable. Zero for addition, or, xor,
|
|
||||||
// one for multiplication, -1 for And.
|
|
||||||
Value *Identity;
|
|
||||||
Value *VectorStart;
|
|
||||||
if (RdxDesc.Kind == LoopVectorizationLegality::RK_IntegerMinMax ||
|
|
||||||
RdxDesc.Kind == LoopVectorizationLegality::RK_FloatMinMax) {
|
|
||||||
// MinMax reduction have the start value as their identify.
|
|
||||||
VectorStart = Identity = RdxDesc.StartValue;
|
|
||||||
|
|
||||||
} else {
|
|
||||||
Identity = LoopVectorizationLegality::getReductionIdentity(RdxDesc.Kind,
|
|
||||||
VecTy->getScalarType());
|
|
||||||
|
|
||||||
// This vector is the Identity vector where the first element is the
|
|
||||||
// incoming scalar reduction.
|
|
||||||
VectorStart = RdxDesc.StartValue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Fix the vector-loop phi.
|
|
||||||
// We created the induction variable so we know that the
|
|
||||||
// preheader is the first entry.
|
|
||||||
BasicBlock *VecPreheader = Induction->getIncomingBlock(0);
|
|
||||||
|
|
||||||
// Reductions do not have to start at zero. They can start with
|
|
||||||
// any loop invariant values.
|
|
||||||
VectorParts &VecRdxPhi = WidenMap.get(RdxPhi);
|
|
||||||
BasicBlock *Latch = OrigLoop->getLoopLatch();
|
|
||||||
Value *LoopVal = RdxPhi->getIncomingValueForBlock(Latch);
|
|
||||||
VectorParts &Val = getVectorValue(LoopVal);
|
|
||||||
for (unsigned part = 0; part < UF; ++part) {
|
|
||||||
// Make sure to add the reduction stat value only to the
|
|
||||||
// first unroll part.
|
|
||||||
Value *StartVal = (part == 0) ? VectorStart : Identity;
|
|
||||||
cast<PHINode>(VecRdxPhi[part])->addIncoming(StartVal, VecPreheader);
|
|
||||||
cast<PHINode>(VecRdxPhi[part])->addIncoming(Val[part], LoopVectorBody);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Before each round, move the insertion point right between
|
|
||||||
// the PHIs and the values we are going to write.
|
|
||||||
// This allows us to write both PHINodes and the extractelement
|
|
||||||
// instructions.
|
|
||||||
Builder.SetInsertPoint(LoopMiddleBlock->getFirstInsertionPt());
|
|
||||||
|
|
||||||
VectorParts RdxParts;
|
|
||||||
setDebugLocFromInst(Builder, RdxDesc.LoopExitInstr);
|
|
||||||
for (unsigned part = 0; part < UF; ++part) {
|
|
||||||
// This PHINode contains the vectorized reduction variable, or
|
|
||||||
// the initial value vector, if we bypass the vector loop.
|
|
||||||
VectorParts &RdxExitVal = getVectorValue(RdxDesc.LoopExitInstr);
|
|
||||||
PHINode *NewPhi = Builder.CreatePHI(VecTy, 2, "rdx.vec.exit.phi");
|
|
||||||
Value *StartVal = (part == 0) ? VectorStart : Identity;
|
|
||||||
for (unsigned I = 0, E = LoopBypassBlocks.size(); I != E; ++I)
|
|
||||||
NewPhi->addIncoming(StartVal, LoopBypassBlocks[I]);
|
|
||||||
NewPhi->addIncoming(RdxExitVal[part], LoopVectorBody);
|
|
||||||
RdxParts.push_back(NewPhi);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Reduce all of the unrolled parts into a single vector.
|
|
||||||
Value *ReducedPartRdx = RdxParts[0];
|
|
||||||
unsigned Op = getReductionBinOp(RdxDesc.Kind);
|
|
||||||
setDebugLocFromInst(Builder, ReducedPartRdx);
|
|
||||||
for (unsigned part = 1; part < UF; ++part) {
|
|
||||||
if (Op != Instruction::ICmp && Op != Instruction::FCmp)
|
|
||||||
ReducedPartRdx = Builder.CreateBinOp((Instruction::BinaryOps)Op,
|
|
||||||
RdxParts[part], ReducedPartRdx,
|
|
||||||
"bin.rdx");
|
|
||||||
else
|
|
||||||
ReducedPartRdx = createMinMaxOp(Builder, RdxDesc.MinMaxKind,
|
|
||||||
ReducedPartRdx, RdxParts[part]);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Now, we need to fix the users of the reduction variable
|
|
||||||
// inside and outside of the scalar remainder loop.
|
|
||||||
// We know that the loop is in LCSSA form. We need to update the
|
|
||||||
// PHI nodes in the exit blocks.
|
|
||||||
for (BasicBlock::iterator LEI = LoopExitBlock->begin(),
|
|
||||||
LEE = LoopExitBlock->end(); LEI != LEE; ++LEI) {
|
|
||||||
PHINode *LCSSAPhi = dyn_cast<PHINode>(LEI);
|
|
||||||
if (!LCSSAPhi) continue;
|
|
||||||
|
|
||||||
// All PHINodes need to have a single entry edge, or two if
|
|
||||||
// we already fixed them.
|
|
||||||
assert(LCSSAPhi->getNumIncomingValues() < 3 && "Invalid LCSSA PHI");
|
|
||||||
|
|
||||||
// We found our reduction value exit-PHI. Update it with the
|
|
||||||
// incoming bypass edge.
|
|
||||||
if (LCSSAPhi->getIncomingValue(0) == RdxDesc.LoopExitInstr) {
|
|
||||||
// Add an edge coming from the bypass.
|
|
||||||
LCSSAPhi->addIncoming(ReducedPartRdx, LoopMiddleBlock);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}// end of the LCSSA phi scan.
|
|
||||||
|
|
||||||
// Fix the scalar loop reduction variable with the incoming reduction sum
|
|
||||||
// from the vector body and from the backedge value.
|
|
||||||
int IncomingEdgeBlockIdx =
|
|
||||||
(RdxPhi)->getBasicBlockIndex(OrigLoop->getLoopLatch());
|
|
||||||
assert(IncomingEdgeBlockIdx >= 0 && "Invalid block index");
|
|
||||||
// Pick the other block.
|
|
||||||
int SelfEdgeBlockIdx = (IncomingEdgeBlockIdx ? 0 : 1);
|
|
||||||
(RdxPhi)->setIncomingValue(SelfEdgeBlockIdx, ReducedPartRdx);
|
|
||||||
(RdxPhi)->setIncomingValue(IncomingEdgeBlockIdx, RdxDesc.LoopExitInstr);
|
|
||||||
}// end of for each redux variable.
|
|
||||||
|
|
||||||
fixLCSSAPHIs();
|
|
||||||
}
|
|
||||||
|
|
||||||
void InnerLoopUnroller::scalarizeInstruction(Instruction *Instr) {
|
void InnerLoopUnroller::scalarizeInstruction(Instruction *Instr) {
|
||||||
assert(!Instr->getType()->isAggregateType() && "Can't handle vectors");
|
assert(!Instr->getType()->isAggregateType() && "Can't handle vectors");
|
||||||
|
|
Loading…
Reference in New Issue