Refactor 'vectorizeLoop' no functionality change.

This patch merges LoopVectorize of InnerLoopVectorizer and InnerLoopUnroller by adding checks for VF=1. This helps in erasing the Unroller code that is almost identical to the InnerLoopVectorizer code. llvm-svn: 189391
2013-08-27 18:52:47 +00:00 · 2013-08-27 18:52:47 +00:00 · 6b41f7cc4c
parent 047c61510f
commit 6b41f7cc4c
1 changed files with 50 additions and 184 deletions
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@ -354,7 +354,6 @@ public:
    InnerLoopVectorizer(OrigLoop, SE, LI, DT, DL, TLI, 1, UnrollFactor) { }
 private:
  virtual void vectorizeLoop(LoopVectorizationLegality *Legal);
  virtual void scalarizeInstruction(Instruction *Instr);
  virtual void vectorizeMemoryInstruction(Instruction *Instr,
                                          LoopVectorizationLegality *Legal);
@ -2049,18 +2048,31 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
    if (RdxDesc.Kind == LoopVectorizationLegality::RK_IntegerMinMax ||
        RdxDesc.Kind == LoopVectorizationLegality::RK_FloatMinMax) {
      // MinMax reduction have the start value as their identify.
-      VectorStart = Identity = Builder.CreateVectorSplat(VF, RdxDesc.StartValue,
+      if (VF == 1) {
-                                                         "minmax.ident");
+        VectorStart = Identity = RdxDesc.StartValue;
      } else {
        VectorStart = Identity = Builder.CreateVectorSplat(VF,
                                                           RdxDesc.StartValue,
                                                           "minmax.ident");
      }
    } else {
      // Handle other reduction kinds:
      Constant *Iden =
-        LoopVectorizationLegality::getReductionIdentity(RdxDesc.Kind,
+      LoopVectorizationLegality::getReductionIdentity(RdxDesc.Kind,
-                                                        VecTy->getScalarType());
+                                                      VecTy->getScalarType());
-      Identity = ConstantVector::getSplat(VF, Iden);
+      if (VF == 1) {
        Identity = Iden;
        // This vector is the Identity vector where the first element is the
        // incoming scalar reduction.
        VectorStart = RdxDesc.StartValue;
      } else {
        Identity = ConstantVector::getSplat(VF, Iden);
-      // This vector is the Identity vector where the first element is the
+        // This vector is the Identity vector where the first element is the
-      // incoming scalar reduction.
+        // incoming scalar reduction.
-      VectorStart = Builder.CreateInsertElement(Identity,
+        VectorStart = Builder.CreateInsertElement(Identity,
-                                                RdxDesc.StartValue, Zero);
+                                                  RdxDesc.StartValue, Zero);
      }
    }
    // Fix the vector-loop phi.
@ -2116,37 +2128,40 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
                                        ReducedPartRdx, RdxParts[part]);
    }
-    // VF is a power of 2 so we can emit the reduction using log2(VF) shuffles
+    if (VF > 1) {
-    // and vector ops, reducing the set of values being computed by half each
+      // VF is a power of 2 so we can emit the reduction using log2(VF) shuffles
-    // round.
+      // and vector ops, reducing the set of values being computed by half each
-    assert(isPowerOf2_32(VF) &&
+      // round.
-           "Reduction emission only supported for pow2 vectors!");
+      assert(isPowerOf2_32(VF) &&
-    Value *TmpVec = ReducedPartRdx;
+             "Reduction emission only supported for pow2 vectors!");
-    SmallVector<Constant*, 32> ShuffleMask(VF, 0);
+      Value *TmpVec = ReducedPartRdx;
-    for (unsigned i = VF; i != 1; i >>= 1) {
+      SmallVector<Constant*, 32> ShuffleMask(VF, 0);
-      // Move the upper half of the vector to the lower half.
+      for (unsigned i = VF; i != 1; i >>= 1) {
-      for (unsigned j = 0; j != i/2; ++j)
+        // Move the upper half of the vector to the lower half.
-        ShuffleMask[j] = Builder.getInt32(i/2 + j);
+        for (unsigned j = 0; j != i/2; ++j)
          ShuffleMask[j] = Builder.getInt32(i/2 + j);
-      // Fill the rest of the mask with undef.
+        // Fill the rest of the mask with undef.
-      std::fill(&ShuffleMask[i/2], ShuffleMask.end(),
+        std::fill(&ShuffleMask[i/2], ShuffleMask.end(),
-                UndefValue::get(Builder.getInt32Ty()));
+                  UndefValue::get(Builder.getInt32Ty()));
-      Value *Shuf =
+        Value *Shuf =
        Builder.CreateShuffleVector(TmpVec,
                                    UndefValue::get(TmpVec->getType()),
                                    ConstantVector::get(ShuffleMask),
                                    "rdx.shuf");
-      if (Op != Instruction::ICmp && Op != Instruction::FCmp)
+        if (Op != Instruction::ICmp && Op != Instruction::FCmp)
-        TmpVec = Builder.CreateBinOp((Instruction::BinaryOps)Op, TmpVec, Shuf,
+          TmpVec = Builder.CreateBinOp((Instruction::BinaryOps)Op, TmpVec, Shuf,
-                                     "bin.rdx");
+                                       "bin.rdx");
-      else
+        else
-        TmpVec = createMinMaxOp(Builder, RdxDesc.MinMaxKind, TmpVec, Shuf);
+          TmpVec = createMinMaxOp(Builder, RdxDesc.MinMaxKind, TmpVec, Shuf);
-    }
+      }
-    // The result is in the first element of the vector.
+      // The result is in the first element of the vector.
-    Value *Scalar0 = Builder.CreateExtractElement(TmpVec, Builder.getInt32(0));
+      ReducedPartRdx = Builder.CreateExtractElement(TmpVec,
                                                    Builder.getInt32(0));
    }
    // Now, we need to fix the users of the reduction variable
    // inside and outside of the scalar remainder loop.
@ -2165,7 +2180,7 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
      // incoming bypass edge.
      if (LCSSAPhi->getIncomingValue(0) == RdxDesc.LoopExitInstr) {
        // Add an edge coming from the bypass.
-        LCSSAPhi->addIncoming(Scalar0, LoopMiddleBlock);
+        LCSSAPhi->addIncoming(ReducedPartRdx, LoopMiddleBlock);
        break;
      }
    }// end of the LCSSA phi scan.
@ -2177,7 +2192,7 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
    assert(IncomingEdgeBlockIdx >= 0 && "Invalid block index");
    // Pick the other block.
    int SelfEdgeBlockIdx = (IncomingEdgeBlockIdx ? 0 : 1);
-    (RdxPhi)->setIncomingValue(SelfEdgeBlockIdx, Scalar0);
+    (RdxPhi)->setIncomingValue(SelfEdgeBlockIdx, ReducedPartRdx);
    (RdxPhi)->setIncomingValue(IncomingEdgeBlockIdx, RdxDesc.LoopExitInstr);
  }// end of for each redux variable.
@ -4788,155 +4803,6 @@ bool LoopVectorizationCostModel::isConsecutiveLoadOrStore(Instruction *Inst) {
  return false;
 }
 void
 InnerLoopUnroller::vectorizeLoop(LoopVectorizationLegality *Legal) {
  // In order to support reduction variables we need to be able to unroll
  // Phi nodes. Phi nodes have cycles, so we need to unroll them in two
  // stages. See InnerLoopVectorizer::vectorizeLoop for more details.
  PhiVector RdxPHIsToFix;
  // Scan the loop in a topological order to ensure that defs are vectorized
  // before users.
  LoopBlocksDFS DFS(OrigLoop);
  DFS.perform(LI);
  // Unroll all of the blocks in the original loop.
  for (LoopBlocksDFS::RPOIterator bb = DFS.beginRPO(), be = DFS.endRPO();
       bb != be; ++bb)
    vectorizeBlockInLoop(Legal, *bb, &RdxPHIsToFix);
  // Create the 'reduced' values for each of the induction vars.
  // The reduced values are the vector values that we scalarize and combine
  // after the loop is finished.
  for (PhiVector::iterator it = RdxPHIsToFix.begin(), e = RdxPHIsToFix.end();
       it != e; ++it) {
    PHINode *RdxPhi = *it;
    assert(RdxPhi && "Unable to recover vectorized PHI");
    // Find the reduction variable descriptor.
    assert(Legal->getReductionVars()->count(RdxPhi) &&
           "Unable to find the reduction variable");
    LoopVectorizationLegality::ReductionDescriptor RdxDesc =
    (*Legal->getReductionVars())[RdxPhi];
    setDebugLocFromInst(Builder, RdxDesc.StartValue);
    // We need to generate a reduction vector from the incoming scalar.
    // To do so, we need to generate the 'identity' vector and overide
    // one of the elements with the incoming scalar reduction. We need
    // to do it in the vector-loop preheader.
    Builder.SetInsertPoint(LoopBypassBlocks.front()->getTerminator());
    // This is the vector-clone of the value that leaves the loop.
    VectorParts &VectorExit = getVectorValue(RdxDesc.LoopExitInstr);
    Type *VecTy = VectorExit[0]->getType();
    // Find the reduction identity variable. Zero for addition, or, xor,
    // one for multiplication, -1 for And.
    Value *Identity;
    Value *VectorStart;
    if (RdxDesc.Kind == LoopVectorizationLegality::RK_IntegerMinMax ||
        RdxDesc.Kind == LoopVectorizationLegality::RK_FloatMinMax) {
      // MinMax reduction have the start value as their identify.
      VectorStart = Identity = RdxDesc.StartValue;
    } else {
      Identity = LoopVectorizationLegality::getReductionIdentity(RdxDesc.Kind,
                                                        VecTy->getScalarType());
      // This vector is the Identity vector where the first element is the
      // incoming scalar reduction.
      VectorStart = RdxDesc.StartValue;
    }
    // Fix the vector-loop phi.
    // We created the induction variable so we know that the
    // preheader is the first entry.
    BasicBlock *VecPreheader = Induction->getIncomingBlock(0);
    // Reductions do not have to start at zero. They can start with
    // any loop invariant values.
    VectorParts &VecRdxPhi = WidenMap.get(RdxPhi);
    BasicBlock *Latch = OrigLoop->getLoopLatch();
    Value *LoopVal = RdxPhi->getIncomingValueForBlock(Latch);
    VectorParts &Val = getVectorValue(LoopVal);
    for (unsigned part = 0; part < UF; ++part) {
      // Make sure to add the reduction stat value only to the
      // first unroll part.
      Value *StartVal = (part == 0) ? VectorStart : Identity;
      cast<PHINode>(VecRdxPhi[part])->addIncoming(StartVal, VecPreheader);
      cast<PHINode>(VecRdxPhi[part])->addIncoming(Val[part], LoopVectorBody);
    }
    // Before each round, move the insertion point right between
    // the PHIs and the values we are going to write.
    // This allows us to write both PHINodes and the extractelement
    // instructions.
    Builder.SetInsertPoint(LoopMiddleBlock->getFirstInsertionPt());
    VectorParts RdxParts;
    setDebugLocFromInst(Builder, RdxDesc.LoopExitInstr);
    for (unsigned part = 0; part < UF; ++part) {
      // This PHINode contains the vectorized reduction variable, or
      // the initial value vector, if we bypass the vector loop.
      VectorParts &RdxExitVal = getVectorValue(RdxDesc.LoopExitInstr);
      PHINode *NewPhi = Builder.CreatePHI(VecTy, 2, "rdx.vec.exit.phi");
      Value *StartVal = (part == 0) ? VectorStart : Identity;
      for (unsigned I = 0, E = LoopBypassBlocks.size(); I != E; ++I)
        NewPhi->addIncoming(StartVal, LoopBypassBlocks[I]);
      NewPhi->addIncoming(RdxExitVal[part], LoopVectorBody);
      RdxParts.push_back(NewPhi);
    }
    // Reduce all of the unrolled parts into a single vector.
    Value *ReducedPartRdx = RdxParts[0];
    unsigned Op = getReductionBinOp(RdxDesc.Kind);
    setDebugLocFromInst(Builder, ReducedPartRdx);
    for (unsigned part = 1; part < UF; ++part) {
      if (Op != Instruction::ICmp && Op != Instruction::FCmp)
        ReducedPartRdx = Builder.CreateBinOp((Instruction::BinaryOps)Op,
                                             RdxParts[part], ReducedPartRdx,
                                             "bin.rdx");
      else
        ReducedPartRdx = createMinMaxOp(Builder, RdxDesc.MinMaxKind,
                                        ReducedPartRdx, RdxParts[part]);
    }
    // Now, we need to fix the users of the reduction variable
    // inside and outside of the scalar remainder loop.
    // We know that the loop is in LCSSA form. We need to update the
    // PHI nodes in the exit blocks.
    for (BasicBlock::iterator LEI = LoopExitBlock->begin(),
         LEE = LoopExitBlock->end(); LEI != LEE; ++LEI) {
      PHINode *LCSSAPhi = dyn_cast<PHINode>(LEI);
      if (!LCSSAPhi) continue;
      // All PHINodes need to have a single entry edge, or two if
      // we already fixed them.
      assert(LCSSAPhi->getNumIncomingValues() < 3 && "Invalid LCSSA PHI");
      // We found our reduction value exit-PHI. Update it with the
      // incoming bypass edge.
      if (LCSSAPhi->getIncomingValue(0) == RdxDesc.LoopExitInstr) {
        // Add an edge coming from the bypass.
        LCSSAPhi->addIncoming(ReducedPartRdx, LoopMiddleBlock);
        break;
      }
    }// end of the LCSSA phi scan.
    // Fix the scalar loop reduction variable with the incoming reduction sum
    // from the vector body and from the backedge value.
    int IncomingEdgeBlockIdx =
    (RdxPhi)->getBasicBlockIndex(OrigLoop->getLoopLatch());
    assert(IncomingEdgeBlockIdx >= 0 && "Invalid block index");
    // Pick the other block.
    int SelfEdgeBlockIdx = (IncomingEdgeBlockIdx ? 0 : 1);
    (RdxPhi)->setIncomingValue(SelfEdgeBlockIdx, ReducedPartRdx);
    (RdxPhi)->setIncomingValue(IncomingEdgeBlockIdx, RdxDesc.LoopExitInstr);
  }// end of for each redux variable.
  fixLCSSAPHIs();
 }
 void InnerLoopUnroller::scalarizeInstruction(Instruction *Instr) {
  assert(!Instr->getType()->isAggregateType() && "Can't handle vectors");