[CostModel][X86] Generalized cost calculation of SHL by constant -> MUL conversion.
llvm-svn: 291364
This commit is contained in:
parent
935beac173
commit
e70644dab7
|
@ -409,21 +409,9 @@ int X86TTIImpl::getArithmeticInstrCost(
|
|||
Op2Info == TargetTransformInfo::OK_NonUniformConstantValue) {
|
||||
MVT VT = LT.second;
|
||||
// Vector shift left by non uniform constant can be lowered
|
||||
// into vector multiply (pmullw/pmulld).
|
||||
if ((VT == MVT::v8i16 && ST->hasSSE2()) ||
|
||||
(VT == MVT::v4i32 && ST->hasSSE41()))
|
||||
return LT.first;
|
||||
|
||||
// v16i16 and v8i32 shifts by non-uniform constants are lowered into a
|
||||
// sequence of extract + two vector multiply + insert.
|
||||
if ((VT == MVT::v8i32 || VT == MVT::v16i16) &&
|
||||
(ST->hasAVX() && !ST->hasAVX2()))
|
||||
ISD = ISD::MUL;
|
||||
|
||||
// A vector shift left by non uniform constant is converted
|
||||
// into a vector multiply; the new multiply is eventually
|
||||
// lowered into a sequence of shuffles and 2 x pmuludq.
|
||||
if (VT == MVT::v4i32 && ST->hasSSE2())
|
||||
// into vector multiply.
|
||||
if (((VT == MVT::v8i16 || VT == MVT::v4i32) && ST->hasSSE2()) ||
|
||||
((VT == MVT::v16i16 || VT == MVT::v8i32) && ST->hasAVX()))
|
||||
ISD = ISD::MUL;
|
||||
}
|
||||
|
||||
|
@ -534,6 +522,7 @@ int X86TTIImpl::getArithmeticInstrCost(
|
|||
{ ISD::SRA, MVT::v4i64, 2*12 }, // srl/xor/sub sequence.
|
||||
|
||||
{ ISD::MUL, MVT::v16i8, 12 }, // extend/pmullw/trunc sequence.
|
||||
{ ISD::MUL, MVT::v8i16, 1 }, // pmullw
|
||||
{ ISD::MUL, MVT::v4i32, 6 }, // 3*pmuludq/4*shuffle
|
||||
{ ISD::MUL, MVT::v2i64, 8 }, // 3*pmuludq/3*shift/2*add
|
||||
|
||||
|
@ -549,13 +538,13 @@ int X86TTIImpl::getArithmeticInstrCost(
|
|||
// generally a bad idea. Assume somewhat arbitrarily that we have to be able
|
||||
// to hide "20 cycles" for each lane.
|
||||
{ ISD::SDIV, MVT::v16i8, 16*20 },
|
||||
{ ISD::SDIV, MVT::v8i16, 8*20 },
|
||||
{ ISD::SDIV, MVT::v4i32, 4*20 },
|
||||
{ ISD::SDIV, MVT::v2i64, 2*20 },
|
||||
{ ISD::SDIV, MVT::v8i16, 8*20 },
|
||||
{ ISD::SDIV, MVT::v4i32, 4*20 },
|
||||
{ ISD::SDIV, MVT::v2i64, 2*20 },
|
||||
{ ISD::UDIV, MVT::v16i8, 16*20 },
|
||||
{ ISD::UDIV, MVT::v8i16, 8*20 },
|
||||
{ ISD::UDIV, MVT::v4i32, 4*20 },
|
||||
{ ISD::UDIV, MVT::v2i64, 2*20 },
|
||||
{ ISD::UDIV, MVT::v8i16, 8*20 },
|
||||
{ ISD::UDIV, MVT::v4i32, 4*20 },
|
||||
{ ISD::UDIV, MVT::v2i64, 2*20 },
|
||||
};
|
||||
|
||||
if (ST->hasSSE2())
|
||||
|
|
Loading…
Reference in New Issue