[X86][AVX] Improved VPERMILPS variable shuffle mask decoding.

Added support for decoding VPERMILPS variable shuffle masks that aren't in the constant pool.

Added target shuffle mask decoding for SCALAR_TO_VECTOR+VZEXT_MOVL cases - these can happen for v2i64 constant re-materialization

Followup to D17681

llvm-svn: 262784
This commit is contained in:
Simon Pilgrim 2016-03-05 22:53:31 +00:00
parent 1feae0d0bb
commit 40e1a71cdd
4 changed files with 43 additions and 5 deletions

View File

@ -454,6 +454,24 @@ void DecodeINSERTQIMask(int Len, int Idx,
ShuffleMask.push_back(SM_SentinelUndef);
}
void DecodeVPERMILPMask(MVT VT, ArrayRef<uint64_t> RawMask,
SmallVectorImpl<int> &ShuffleMask) {
unsigned VecSize = VT.getSizeInBits();
unsigned EltSize = VT.getScalarSizeInBits();
unsigned NumLanes = VecSize / 128;
unsigned NumEltsPerLane = VT.getVectorNumElements() / NumLanes;
assert((VecSize == 128 || VecSize == 256 || VecSize == 512) &&
"Unexpected vector size");
assert((EltSize == 32 || EltSize == 64) && "Unexpected element size");
for (unsigned i = 0, e = RawMask.size(); i < e; ++i) {
uint64_t M = RawMask[i];
M = (EltSize == 64 ? ((M >> 1) & 0x1) : (M & 0x3));
unsigned LaneOffset = i & ~(NumEltsPerLane - 1);
ShuffleMask.push_back((int)(LaneOffset + M));
}
}
void DecodeVPERMVMask(ArrayRef<uint64_t> RawMask,
SmallVectorImpl<int> &ShuffleMask) {
for (int i = 0, e = RawMask.size(); i < e; ++i) {

View File

@ -115,6 +115,11 @@ void DecodeEXTRQIMask(int Len, int Idx,
void DecodeINSERTQIMask(int Len, int Idx,
SmallVectorImpl<int> &ShuffleMask);
/// \brief Decode a VPERMILPD/VPERMILPS variable mask from a raw
/// array of constants.
void DecodeVPERMILPMask(MVT VT, ArrayRef<uint64_t> RawMask,
SmallVectorImpl<int> &ShuffleMask);
/// \brief Decode a VPERM W/D/Q/PS/PD mask from a raw array of constants.
void DecodeVPERMVMask(ArrayRef<uint64_t> RawMask,
SmallVectorImpl<int> &ShuffleMask);

View File

@ -4880,6 +4880,20 @@ static bool getTargetShuffleMaskIndices(SDValue MaskNode,
return false;
}
if (MaskNode.getOpcode() == X86ISD::VZEXT_MOVL &&
MaskNode.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR) {
if (VT.getScalarSizeInBits() != MaskEltSizeInBits)
return false;
SDValue MaskElement = MaskNode.getOperand(0).getOperand(0);
if (auto *CN = dyn_cast<ConstantSDNode>(MaskElement)) {
APInt RawElt = CN->getAPIntValue().getLoBits(MaskEltSizeInBits);
RawMask.push_back(RawElt.getZExtValue());
RawMask.append(VT.getVectorNumElements() - 1, 0);
return true;
}
return false;
}
if (MaskNode.getOpcode() != ISD::BUILD_VECTOR)
return false;
@ -5012,8 +5026,13 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
case X86ISD::VPERMILPV: {
IsUnary = true;
SDValue MaskNode = N->getOperand(1);
unsigned MaskEltSize = VT.getScalarSizeInBits();
SmallVector<uint64_t, 32> RawMask;
if (getTargetShuffleMaskIndices(MaskNode, MaskEltSize, RawMask)) {
DecodeVPERMILPMask(VT, RawMask, Mask);
break;
}
if (auto *C = getTargetShuffleMaskConstant(MaskNode)) {
unsigned MaskEltSize = VT.getScalarSizeInBits();
DecodeVPERMILPMask(C, MaskEltSize, Mask);
break;
}

View File

@ -40,10 +40,6 @@ define <8 x float> @combine_vpermilvar_8f32(<8 x float> %a0) {
define <2 x double> @combine_vpermilvar_2f64(<2 x double> %a0) {
; ALL-LABEL: combine_vpermilvar_2f64:
; ALL: # BB#0:
; ALL-NEXT: movl $2, %eax
; ALL-NEXT: vmovq %rax, %xmm1
; ALL-NEXT: vpermilpd %xmm1, %xmm0, %xmm0
; ALL-NEXT: vpermilpd %xmm1, %xmm0, %xmm0
; ALL-NEXT: retq
%1 = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> <i64 2, i64 0>)
%2 = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %1, <2 x i64> <i64 2, i64 0>)