[X86][AVX] Improved VPERMILPS variable shuffle mask decoding.
Added support for decoding VPERMILPS variable shuffle masks that aren't in the constant pool. Added target shuffle mask decoding for SCALAR_TO_VECTOR+VZEXT_MOVL cases - these can happen for v2i64 constant re-materialization Followup to D17681 llvm-svn: 262784
This commit is contained in:
parent
1feae0d0bb
commit
40e1a71cdd
|
@ -454,6 +454,24 @@ void DecodeINSERTQIMask(int Len, int Idx,
|
|||
ShuffleMask.push_back(SM_SentinelUndef);
|
||||
}
|
||||
|
||||
void DecodeVPERMILPMask(MVT VT, ArrayRef<uint64_t> RawMask,
|
||||
SmallVectorImpl<int> &ShuffleMask) {
|
||||
unsigned VecSize = VT.getSizeInBits();
|
||||
unsigned EltSize = VT.getScalarSizeInBits();
|
||||
unsigned NumLanes = VecSize / 128;
|
||||
unsigned NumEltsPerLane = VT.getVectorNumElements() / NumLanes;
|
||||
assert((VecSize == 128 || VecSize == 256 || VecSize == 512) &&
|
||||
"Unexpected vector size");
|
||||
assert((EltSize == 32 || EltSize == 64) && "Unexpected element size");
|
||||
|
||||
for (unsigned i = 0, e = RawMask.size(); i < e; ++i) {
|
||||
uint64_t M = RawMask[i];
|
||||
M = (EltSize == 64 ? ((M >> 1) & 0x1) : (M & 0x3));
|
||||
unsigned LaneOffset = i & ~(NumEltsPerLane - 1);
|
||||
ShuffleMask.push_back((int)(LaneOffset + M));
|
||||
}
|
||||
}
|
||||
|
||||
void DecodeVPERMVMask(ArrayRef<uint64_t> RawMask,
|
||||
SmallVectorImpl<int> &ShuffleMask) {
|
||||
for (int i = 0, e = RawMask.size(); i < e; ++i) {
|
||||
|
|
|
@ -115,6 +115,11 @@ void DecodeEXTRQIMask(int Len, int Idx,
|
|||
void DecodeINSERTQIMask(int Len, int Idx,
|
||||
SmallVectorImpl<int> &ShuffleMask);
|
||||
|
||||
/// \brief Decode a VPERMILPD/VPERMILPS variable mask from a raw
|
||||
/// array of constants.
|
||||
void DecodeVPERMILPMask(MVT VT, ArrayRef<uint64_t> RawMask,
|
||||
SmallVectorImpl<int> &ShuffleMask);
|
||||
|
||||
/// \brief Decode a VPERM W/D/Q/PS/PD mask from a raw array of constants.
|
||||
void DecodeVPERMVMask(ArrayRef<uint64_t> RawMask,
|
||||
SmallVectorImpl<int> &ShuffleMask);
|
||||
|
|
|
@ -4880,6 +4880,20 @@ static bool getTargetShuffleMaskIndices(SDValue MaskNode,
|
|||
return false;
|
||||
}
|
||||
|
||||
if (MaskNode.getOpcode() == X86ISD::VZEXT_MOVL &&
|
||||
MaskNode.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR) {
|
||||
if (VT.getScalarSizeInBits() != MaskEltSizeInBits)
|
||||
return false;
|
||||
SDValue MaskElement = MaskNode.getOperand(0).getOperand(0);
|
||||
if (auto *CN = dyn_cast<ConstantSDNode>(MaskElement)) {
|
||||
APInt RawElt = CN->getAPIntValue().getLoBits(MaskEltSizeInBits);
|
||||
RawMask.push_back(RawElt.getZExtValue());
|
||||
RawMask.append(VT.getVectorNumElements() - 1, 0);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
if (MaskNode.getOpcode() != ISD::BUILD_VECTOR)
|
||||
return false;
|
||||
|
||||
|
@ -5012,8 +5026,13 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
|
|||
case X86ISD::VPERMILPV: {
|
||||
IsUnary = true;
|
||||
SDValue MaskNode = N->getOperand(1);
|
||||
unsigned MaskEltSize = VT.getScalarSizeInBits();
|
||||
SmallVector<uint64_t, 32> RawMask;
|
||||
if (getTargetShuffleMaskIndices(MaskNode, MaskEltSize, RawMask)) {
|
||||
DecodeVPERMILPMask(VT, RawMask, Mask);
|
||||
break;
|
||||
}
|
||||
if (auto *C = getTargetShuffleMaskConstant(MaskNode)) {
|
||||
unsigned MaskEltSize = VT.getScalarSizeInBits();
|
||||
DecodeVPERMILPMask(C, MaskEltSize, Mask);
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -40,10 +40,6 @@ define <8 x float> @combine_vpermilvar_8f32(<8 x float> %a0) {
|
|||
define <2 x double> @combine_vpermilvar_2f64(<2 x double> %a0) {
|
||||
; ALL-LABEL: combine_vpermilvar_2f64:
|
||||
; ALL: # BB#0:
|
||||
; ALL-NEXT: movl $2, %eax
|
||||
; ALL-NEXT: vmovq %rax, %xmm1
|
||||
; ALL-NEXT: vpermilpd %xmm1, %xmm0, %xmm0
|
||||
; ALL-NEXT: vpermilpd %xmm1, %xmm0, %xmm0
|
||||
; ALL-NEXT: retq
|
||||
%1 = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> <i64 2, i64 0>)
|
||||
%2 = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %1, <2 x i64> <i64 2, i64 0>)
|
||||
|
|
Loading…
Reference in New Issue