[TargetTransformInfo] Refactor and improve getScalarizationOverhead()

Refactoring to remove duplications of this method. New method getOperandsScalarizationOverhead() that looks at the present unique operands and add extract costs for them. Old behaviour was to just add extract costs for one operand of the type always, which still happens in getArithmeticInstrCost() if no operands are provided by the caller. This is a good start of improving on this, but there are more places that can be improved by using getOperandsScalarizationOverhead(). Review: Hal Finkel https://reviews.llvm.org/D29017 llvm-svn: 293155
2017-01-26 07:03:25 +00:00 · 2017-01-26 07:03:25 +00:00 · 8e2f948ef0
parent 3a3c09c5dd
commit 8e2f948ef0
9 changed files with 92 additions and 79 deletions
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@ -411,6 +411,11 @@ public:
  /// containing this constant value for the target.
  bool shouldBuildLookupTablesForConstant(Constant *C) const;

+  unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
+
+  unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
+                                            unsigned VF) const;
+
  /// \brief Don't restrict interleaved unrolling to small loops.
  bool enableAggressiveInterleaving(bool LoopHasReductions) const;

@ -743,6 +748,10 @@ public:
  virtual unsigned getJumpBufSize() = 0;
  virtual bool shouldBuildLookupTables() = 0;
  virtual bool shouldBuildLookupTablesForConstant(Constant *C) = 0;
+  virtual unsigned
+  getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) = 0;
+  virtual unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
+                                                    unsigned VF) = 0;
  virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0;
  virtual bool enableInterleavedAccessVectorization() = 0;
  virtual bool isFPVectorizationPotentiallyUnsafe() = 0;
@ -933,6 +942,14 @@ public:
  bool shouldBuildLookupTablesForConstant(Constant *C) override {
    return Impl.shouldBuildLookupTablesForConstant(C);
  }
+  unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) {
+    return Impl.getScalarizationOverhead(Ty, Insert, Extract);
+  }
+  unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
+                                            unsigned VF) {
+    return Impl.getOperandsScalarizationOverhead(Args, VF);
+  }
+
  bool enableAggressiveInterleaving(bool LoopHasReductions) override {
    return Impl.enableAggressiveInterleaving(LoopHasReductions);
  }
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@ -251,6 +251,13 @@ public:
  bool shouldBuildLookupTables() { return true; }
  bool shouldBuildLookupTablesForConstant(Constant *C) { return true; }

+  unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) {
+    return 0;
+  }
+
+  unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
+                                            unsigned VF) { return 0; }
+
  bool enableAggressiveInterleaving(bool LoopHasReductions) { return false; }

  bool enableInterleavedAccessVectorization() { return false; }
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@ -42,24 +42,6 @@ private:
  typedef TargetTransformInfoImplCRTPBase<T> BaseT;
  typedef TargetTransformInfo TTI;

-  /// Estimate the overhead of scalarizing an instruction. Insert and Extract
-  /// are set if the result needs to be inserted and/or extracted from vectors.
-  unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) {
-    assert(Ty->isVectorTy() && "Can only scalarize vectors");
-    unsigned Cost = 0;
-
-    for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
-      if (Insert)
-        Cost += static_cast<T *>(this)
-                    ->getVectorInstrCost(Instruction::InsertElement, Ty, i);
-      if (Extract)
-        Cost += static_cast<T *>(this)
-                    ->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
-    }
-
-    return Cost;
-  }
-
  /// Estimate a cost of shuffle as a sequence of extract and insert
  /// operations.
  unsigned getPermuteShuffleOverhead(Type *Ty) {
@ -301,6 +283,37 @@ public:

  unsigned getRegisterBitWidth(bool Vector) { return 32; }

+  /// Estimate the overhead of scalarizing an instruction. Insert and Extract
+  /// are set if the result needs to be inserted and/or extracted from vectors.
+  unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) {
+    assert(Ty->isVectorTy() && "Can only scalarize vectors");
+    unsigned Cost = 0;
+
+    for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
+      if (Insert)
+        Cost += static_cast<T *>(this)
+                    ->getVectorInstrCost(Instruction::InsertElement, Ty, i);
+      if (Extract)
+        Cost += static_cast<T *>(this)
+                    ->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
+    }
+
+    return Cost;
+  }
+
+  /// Estimate the overhead of scalarizing an instructions unique operands.
+  unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
+                                            unsigned VF) {
+    unsigned Cost = 0;
+    SmallPtrSet<const Value*, 4> UniqueOperands;
+    for (const Value *A : Args) {
+      if (UniqueOperands.insert(A).second)
+        Cost += getScalarizationOverhead(VectorType::get(A->getType(), VF),
+                                         false, true);
+    }
+    return Cost;
+  }
+
  unsigned getMaxInterleaveFactor(unsigned VF) { return 1; }

  unsigned getArithmeticInstrCost(
@ -341,10 +354,17 @@ public:
      unsigned Num = Ty->getVectorNumElements();
      unsigned Cost = static_cast<T *>(this)
                          ->getArithmeticInstrCost(Opcode, Ty->getScalarType());
-      // return the cost of multiple scalar invocation plus the cost of
-      // inserting
-      // and extracting the values.
-      return getScalarizationOverhead(Ty, true, true) + Num * Cost;
+      // Return the cost of multiple scalar invocation plus the cost of
+      // inserting and extracting the values.
+      unsigned TotCost = getScalarizationOverhead(Ty, true, false) + Num * Cost;
+      if (!Args.empty())
+        TotCost += getOperandsScalarizationOverhead(Args, Num);
+      else
+        // When no information on arguments is provided, we add the cost
+        // associated with one argument as a heuristic.
+        TotCost += getScalarizationOverhead(Ty, false, true);
+
+      return TotCost;
    }

    // We don't know anything about this scalar instruction.
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@ -182,6 +182,17 @@ bool TargetTransformInfo::shouldBuildLookupTablesForConstant(Constant *C) const
  return TTIImpl->shouldBuildLookupTablesForConstant(C);
 }

+unsigned TargetTransformInfo::
+getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const {
+  return TTIImpl->getScalarizationOverhead(Ty, Insert, Extract);
+}
+
+unsigned TargetTransformInfo::
+getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
+                                 unsigned VF) const {
+  return TTIImpl->getOperandsScalarizationOverhead(Args, VF);
+}
+
 bool TargetTransformInfo::enableAggressiveInterleaving(bool LoopHasReductions) const {
  return TTIImpl->enableAggressiveInterleaving(LoopHasReductions);
 }
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@ -34,10 +34,6 @@ class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
  const AArch64Subtarget *ST;
  const AArch64TargetLowering *TLI;

-  /// Estimate the overhead of scalarizing an instruction. Insert and Extract
-  /// are set if the result needs to be inserted and/or extracted from vectors.
-  unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract);
-
  const AArch64Subtarget *getST() const { return ST; }
  const AArch64TargetLowering *getTLI() const { return TLI; }

--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
@ -33,10 +33,6 @@ class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> {
  const ARMSubtarget *ST;
  const ARMTargetLowering *TLI;

-  /// Estimate the overhead of scalarizing an instruction. Insert and Extract
-  /// are set if the result needs to be inserted and/or extracted from vectors.
-  unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract);
-
  const ARMSubtarget *getST() const { return ST; }
  const ARMTargetLowering *getTLI() const { return TLI; }

--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@ -1577,20 +1577,6 @@ int X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
  return BaseT::getVectorInstrCost(Opcode, Val, Index) + RegisterFileMoveCost;
 }

-int X86TTIImpl::getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) {
-  assert (Ty->isVectorTy() && "Can only scalarize vectors");
-  int Cost = 0;
-
-  for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
-    if (Insert)
-      Cost += getVectorInstrCost(Instruction::InsertElement, Ty, i);
-    if (Extract)
-      Cost += getVectorInstrCost(Instruction::ExtractElement, Ty, i);
-  }
-
-  return Cost;
-}
-
 int X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
                                unsigned AddressSpace) {
  // Handle non-power-of-two vectors such as <3 x float>
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.h
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h
@ -33,8 +33,6 @@ class X86TTIImpl : public BasicTTIImplBase<X86TTIImpl> {
  const X86Subtarget *ST;
  const X86TargetLowering *TLI;

-  int getScalarizationOverhead(Type *Ty, bool Insert, bool Extract);
-
  const X86Subtarget *getST() const { return ST; }
  const X86TargetLowering *getTLI() const { return TLI; }

--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@ -3598,37 +3598,18 @@ static Value *addFastMathFlag(Value *V) {
  return V;
 }

-/// \brief Estimate the overhead of scalarizing a value based on its type.
-/// Insert and Extract are set if the result needs to be inserted and/or
-/// extracted from vectors.
-static unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract,
-                                         const TargetTransformInfo &TTI) {
-  if (Ty->isVoidTy())
-    return 0;
-
-  assert(Ty->isVectorTy() && "Can only scalarize vectors");
-  unsigned Cost = 0;
-
-  for (unsigned I = 0, E = Ty->getVectorNumElements(); I < E; ++I) {
-    if (Extract)
-      Cost += TTI.getVectorInstrCost(Instruction::ExtractElement, Ty, I);
-    if (Insert)
-      Cost += TTI.getVectorInstrCost(Instruction::InsertElement, Ty, I);
-  }
-
-  return Cost;
-}
-
 /// \brief Estimate the overhead of scalarizing an Instruction based on the
 /// types of its operands and return value.
 static unsigned getScalarizationOverhead(SmallVectorImpl<Type *> &OpTys,
                                         Type *RetTy,
                                         const TargetTransformInfo &TTI) {
-  unsigned ScalarizationCost =
-      getScalarizationOverhead(RetTy, true, false, TTI);
+  unsigned ScalarizationCost = 0;
+
+  if (!RetTy->isVoidTy())
+    ScalarizationCost += TTI.getScalarizationOverhead(RetTy, true, false);

  for (Type *Ty : OpTys)
-    ScalarizationCost += getScalarizationOverhead(Ty, false, true, TTI);
+    ScalarizationCost += TTI.getScalarizationOverhead(Ty, false, true);

  return ScalarizationCost;
 }
@ -3640,14 +3621,15 @@ static unsigned getScalarizationOverhead(Instruction *I, unsigned VF,
  if (VF == 1)
    return 0;

+  unsigned Cost = 0;
  Type *RetTy = ToVectorTy(I->getType(), VF);
+  if (!RetTy->isVoidTy())
+    Cost += TTI.getScalarizationOverhead(RetTy, true, false);

-  SmallVector<Type *, 4> OpTys;
-  unsigned OperandsNum = I->getNumOperands();
-  for (unsigned OpInd = 0; OpInd < OperandsNum; ++OpInd)
-    OpTys.push_back(ToVectorTy(I->getOperand(OpInd)->getType(), VF));
+  SmallVector<const Value *, 4> Operands(I->operand_values());
+  Cost += TTI.getOperandsScalarizationOverhead(Operands, VF);

-  return getScalarizationOverhead(OpTys, RetTy, TTI);
+  return Cost;
 }

 // Estimate cost of a call instruction CI if it were vectorized with factor VF.
@ -6713,8 +6695,8 @@ int LoopVectorizationCostModel::computePredInstDiscount(
    // Compute the scalarization overhead of needed insertelement instructions
    // and phi nodes.
    if (Legal->isScalarWithPredication(I) && !I->getType()->isVoidTy()) {
-      ScalarCost += getScalarizationOverhead(ToVectorTy(I->getType(), VF), true,
-                                             false, TTI);
+      ScalarCost += TTI.getScalarizationOverhead(ToVectorTy(I->getType(), VF),
+                                                 true, false);
      ScalarCost += VF * TTI.getCFInstrCost(Instruction::PHI);
    }

@ -6729,8 +6711,8 @@ int LoopVectorizationCostModel::computePredInstDiscount(
        if (canBeScalarized(J))
          Worklist.push_back(J);
        else if (needsExtract(J))
-          ScalarCost += getScalarizationOverhead(ToVectorTy(J->getType(), VF),
-                                                 false, true, TTI);
+          ScalarCost += TTI.getScalarizationOverhead(
+                              ToVectorTy(J->getType(),VF), false, true);
      }

    // Scale the total scalar cost by block probability.