[CostModel][X86] Generalized cost calculation of SHL by constant -> MUL conversion.

llvm-svn: 291364
This commit is contained in:
Simon Pilgrim 2017-01-07 21:33:00 +00:00
parent 935beac173
commit e70644dab7
1 changed files with 10 additions and 21 deletions

View File

@ -409,21 +409,9 @@ int X86TTIImpl::getArithmeticInstrCost(
Op2Info == TargetTransformInfo::OK_NonUniformConstantValue) {
MVT VT = LT.second;
// Vector shift left by non uniform constant can be lowered
// into vector multiply (pmullw/pmulld).
if ((VT == MVT::v8i16 && ST->hasSSE2()) ||
(VT == MVT::v4i32 && ST->hasSSE41()))
return LT.first;
// v16i16 and v8i32 shifts by non-uniform constants are lowered into a
// sequence of extract + two vector multiply + insert.
if ((VT == MVT::v8i32 || VT == MVT::v16i16) &&
(ST->hasAVX() && !ST->hasAVX2()))
ISD = ISD::MUL;
// A vector shift left by non uniform constant is converted
// into a vector multiply; the new multiply is eventually
// lowered into a sequence of shuffles and 2 x pmuludq.
if (VT == MVT::v4i32 && ST->hasSSE2())
// into vector multiply.
if (((VT == MVT::v8i16 || VT == MVT::v4i32) && ST->hasSSE2()) ||
((VT == MVT::v16i16 || VT == MVT::v8i32) && ST->hasAVX()))
ISD = ISD::MUL;
}
@ -534,6 +522,7 @@ int X86TTIImpl::getArithmeticInstrCost(
{ ISD::SRA, MVT::v4i64, 2*12 }, // srl/xor/sub sequence.
{ ISD::MUL, MVT::v16i8, 12 }, // extend/pmullw/trunc sequence.
{ ISD::MUL, MVT::v8i16, 1 }, // pmullw
{ ISD::MUL, MVT::v4i32, 6 }, // 3*pmuludq/4*shuffle
{ ISD::MUL, MVT::v2i64, 8 }, // 3*pmuludq/3*shift/2*add
@ -549,13 +538,13 @@ int X86TTIImpl::getArithmeticInstrCost(
// generally a bad idea. Assume somewhat arbitrarily that we have to be able
// to hide "20 cycles" for each lane.
{ ISD::SDIV, MVT::v16i8, 16*20 },
{ ISD::SDIV, MVT::v8i16, 8*20 },
{ ISD::SDIV, MVT::v4i32, 4*20 },
{ ISD::SDIV, MVT::v2i64, 2*20 },
{ ISD::SDIV, MVT::v8i16, 8*20 },
{ ISD::SDIV, MVT::v4i32, 4*20 },
{ ISD::SDIV, MVT::v2i64, 2*20 },
{ ISD::UDIV, MVT::v16i8, 16*20 },
{ ISD::UDIV, MVT::v8i16, 8*20 },
{ ISD::UDIV, MVT::v4i32, 4*20 },
{ ISD::UDIV, MVT::v2i64, 2*20 },
{ ISD::UDIV, MVT::v8i16, 8*20 },
{ ISD::UDIV, MVT::v4i32, 4*20 },
{ ISD::UDIV, MVT::v2i64, 2*20 },
};
if (ST->hasSSE2())