[SystemZ::TTI] Accurate costs for i1->double vector conversions

This factors out a new method getBoolVecToIntConversionCost() containing the code for vector sext/zext of i1, in order to reuse it for i1 to double vector conversions. Review: Ulrich Weigand https://reviews.llvm.org/D53923 llvm-svn: 345817
2018-11-01 09:01:51 +00:00 · 2018-11-01 09:01:51 +00:00 · f15a53bc81
parent 1bb9aea56b
commit f15a53bc81
3 changed files with 73 additions and 15 deletions
--- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
@ -635,6 +635,25 @@ static Type *getCmpOpsType(const Instruction *I, unsigned VF = 1) {
  return nullptr;
 }

+// Get the cost of converting a boolean vector to a vector with same width
+// and element size as Dst, plus the cost of zero extending if needed.
+unsigned SystemZTTIImpl::
+getBoolVecToIntConversionCost(unsigned Opcode, Type *Dst,
+                              const Instruction *I) {
+  assert (Dst->isVectorTy());
+  unsigned VF = Dst->getVectorNumElements();
+  unsigned Cost = 0;
+  // If we know what the widths of the compared operands, get any cost of
+  // converting it to match Dst. Otherwise assume same widths.
+  Type *CmpOpTy = ((I != nullptr) ? getCmpOpsType(I, VF) : nullptr);
+  if (CmpOpTy != nullptr)
+    Cost = getVectorBitmaskConversionCost(CmpOpTy, Dst);
+  if (Opcode == Instruction::ZExt || Opcode == Instruction::UIToFP)
+    // One 'vn' per dst vector with an immediate mask.
+    Cost += getNumVectorRegs(Dst);
+  return Cost;
+}
+
 int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
                                     const Instruction *I) {
  unsigned DstScalarBits = Dst->getScalarSizeInBits();
@ -666,19 +685,8 @@ int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,

        return (NumUnpacks * NumDstVectors) + NumSrcVectorOps;
      }
-      else if (SrcScalarBits == 1) {
-        // This should be extension of a compare i1 result.
-        // If we know what the widths of the compared operands, get the
-        // cost of converting it to Dst. Otherwise assume same widths.
-        unsigned Cost = 0;
-        Type *CmpOpTy = ((I != nullptr) ? getCmpOpsType(I, VF) : nullptr);
-        if (CmpOpTy != nullptr)
-          Cost = getVectorBitmaskConversionCost(CmpOpTy, Dst);
-        if (Opcode == Instruction::ZExt)
-          // One 'vn' per dst vector with an immediate mask.
-          Cost += NumDstVectors;
-        return Cost;
-      }
+      else if (SrcScalarBits == 1)
+        return getBoolVecToIntConversionCost(Opcode, Dst, I);
    }

    if (Opcode == Instruction::SIToFP || Opcode == Instruction::UIToFP ||
@ -687,8 +695,13 @@ int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
      // (seems to miss on differentiating on scalar/vector types).

      // Only 64 bit vector conversions are natively supported.
-      if (SrcScalarBits == 64 && DstScalarBits == 64)
-        return NumDstVectors;
+      if (DstScalarBits == 64) {
+        if (SrcScalarBits == 64)
+          return NumDstVectors;
+
+        if (SrcScalarBits == 1)
+          return getBoolVecToIntConversionCost(Opcode, Dst, I) + NumDstVectors;
+      }

      // Return the cost of multiple scalar invocation plus the cost of
      // inserting and extracting the values. Base implementation does not
--- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
+++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
@ -80,6 +80,8 @@ public:
  int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp);
  unsigned getVectorTruncCost(Type *SrcTy, Type *DstTy);
  unsigned getVectorBitmaskConversionCost(Type *SrcTy, Type *DstTy);
+  unsigned getBoolVecToIntConversionCost(unsigned Opcode, Type *Dst,
+                                         const Instruction *I);
  int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
                       const Instruction *I = nullptr);
  int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
--- a/llvm/test/Analysis/CostModel/SystemZ/cmp-tofp.ll
+++ b/llvm/test/Analysis/CostModel/SystemZ/cmp-tofp.ll
@ -0,0 +1,43 @@
+; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=z13 | FileCheck %s
+;
+; Costs for conversion of i1 vectors to vectors of double.
+
+define <2 x double> @fun0(<2 x i8> %val1, <2 x i8> %val2) {
+  %cmp = icmp eq <2 x i8> %val1, %val2
+  %v = uitofp <2 x i1> %cmp to <2 x double>
+  ret <2 x double> %v
+
+; CHECK: fun0
+; CHECK: cost of 1 for instruction:   %cmp = icmp eq <2 x i8> %val1, %val2
+; CHECK: cost of 5 for instruction:   %v = uitofp <2 x i1> %cmp to <2 x double>
+}
+
+define <2 x double> @fun1(<2 x i8> %val1, <2 x i8> %val2) {
+  %cmp = icmp eq <2 x i8> %val1, %val2
+  %v = sitofp <2 x i1> %cmp to <2 x double>
+  ret <2 x double> %v
+
+; CHECK: fun1
+; CHECK: cost of 1 for instruction:   %cmp = icmp eq <2 x i8> %val1, %val2
+; CHECK: cost of 4 for instruction:   %v = sitofp <2 x i1> %cmp to <2 x double>
+}
+
+define <2 x double> @fun2(<2 x i64> %val1, <2 x i64> %val2) {
+  %cmp = icmp eq <2 x i64> %val1, %val2
+  %v = uitofp <2 x i1> %cmp to <2 x double>
+  ret <2 x double> %v
+
+; CHECK: fun2
+; CHECK: cost of 1 for instruction:   %cmp = icmp eq <2 x i64> %val1, %val2
+; CHECK: cost of 2 for instruction:   %v = uitofp <2 x i1> %cmp to <2 x double>
+}
+
+define <2 x double> @fun3(<2 x i64> %val1, <2 x i64> %val2) {
+  %cmp = icmp eq <2 x i64> %val1, %val2
+  %v = sitofp <2 x i1> %cmp to <2 x double>
+  ret <2 x double> %v
+
+; CHECK: fun3
+; CHECK: cost of 1 for instruction:   %cmp = icmp eq <2 x i64> %val1, %val2
+; CHECK: cost of 1 for instruction:   %v = sitofp <2 x i1> %cmp to <2 x double>
+}