[X86][XOP] Added VPERMIL2PD/VPERMIL2PS raw mask decoding for target shuffle combines

llvm-svn: 271834
This commit is contained in:
Simon Pilgrim 2016-06-05 15:21:30 +00:00
parent 312071943c
commit 64c6de4525
4 changed files with 50 additions and 4 deletions

View File

@ -512,6 +512,46 @@ void DecodeVPERMILPMask(MVT VT, ArrayRef<uint64_t> RawMask,
}
}
void DecodeVPERMIL2PMask(MVT VT, unsigned M2Z, ArrayRef<uint64_t> RawMask,
SmallVectorImpl<int> &ShuffleMask) {
unsigned VecSize = VT.getSizeInBits();
unsigned EltSize = VT.getScalarSizeInBits();
unsigned NumLanes = VecSize / 128;
unsigned NumEltsPerLane = VT.getVectorNumElements() / NumLanes;
assert((VecSize == 128 || VecSize == 256) &&
"Unexpected vector size");
assert((EltSize == 32 || EltSize == 64) && "Unexpected element size");
for (unsigned i = 0, e = RawMask.size(); i < e; ++i) {
// VPERMIL2 Operation.
// Bits[3] - Match Bit.
// Bits[2:1] - (Per Lane) PD Shuffle Mask.
// Bits[2:0] - (Per Lane) PS Shuffle Mask.
uint64_t Selector = RawMask[i];
int MatchBit = (Selector >> 3) & 0x1;
// M2Z[0:1] MatchBit
// 0Xb X Source selected by Selector index.
// 10b 0 Source selected by Selector index.
// 10b 1 Zero.
// 11b 0 Zero.
// 11b 1 Source selected by Selector index.
if ((M2Z & 0x2) != 0 && MatchBit != (M2Z & 0x1)) {
ShuffleMask.push_back(SM_SentinelZero);
continue;
}
unsigned Index = i & ~(NumEltsPerLane - 1);
if (EltSize == 64)
Index += (Selector >> 1) & 0x1;
else
Index += Selector & 0x3;
unsigned SrcOffset = (Selector >> 2) & 1;
ShuffleMask.push_back((int)(SrcOffset + Index));
}
}
void DecodeVPERMVMask(ArrayRef<uint64_t> RawMask,
SmallVectorImpl<int> &ShuffleMask) {
for (int i = 0, e = RawMask.size(); i < e; ++i) {

View File

@ -138,6 +138,10 @@ void DecodeINSERTQIMask(int Len, int Idx,
void DecodeVPERMILPMask(MVT VT, ArrayRef<uint64_t> RawMask,
SmallVectorImpl<int> &ShuffleMask);
/// Decode a VPERMIL2PD/VPERMIL2PS variable mask from a raw array of constants.
void DecodeVPERMIL2PMask(MVT VT, unsigned M2Z, ArrayRef<uint64_t> RawMask,
SmallVectorImpl<int> &ShuffleMask);
/// Decode a VPERM W/D/Q/PS/PD mask from a raw array of constants.
void DecodeVPERMVMask(ArrayRef<uint64_t> RawMask,
SmallVectorImpl<int> &ShuffleMask);

View File

@ -4937,6 +4937,11 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
SDValue CtrlNode = N->getOperand(3);
if (ConstantSDNode *CtrlOp = dyn_cast<ConstantSDNode>(CtrlNode)) {
unsigned CtrlImm = CtrlOp->getZExtValue();
SmallVector<uint64_t, 32> RawMask;
if (getTargetShuffleMaskIndices(MaskNode, MaskEltSize, RawMask)) {
DecodeVPERMIL2PMask(VT, CtrlImm, RawMask, Mask);
break;
}
if (auto *C = getTargetShuffleMaskConstant(MaskNode)) {
DecodeVPERMIL2PMask(C, CtrlImm, MaskEltSize, Mask);
break;

View File

@ -13,10 +13,7 @@ declare <16 x i8> @llvm.x86.xop.vpperm(<16 x i8>, <16 x i8>, <16 x i8>) nounwind
define <2 x double> @combine_vpermil2pd_identity(<2 x double> %a0, <2 x double> %a1) {
; CHECK-LABEL: combine_vpermil2pd_identity:
; CHECK: # BB#0:
; CHECK-NEXT: movl $2, %eax
; CHECK-NEXT: vmovq %rax, %xmm2
; CHECK-NEXT: vpermil2pd $0, %xmm2, %xmm0, %xmm1, %xmm0
; CHECK-NEXT: vpermil2pd $0, %xmm2, %xmm0, %xmm0, %xmm0
; CHECK-NEXT: vmovaps %xmm1, %xmm0
; CHECK-NEXT: retq
%res0 = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a1, <2 x double> %a0, <2 x i64> <i64 2, i64 0>, i8 0)
%res1 = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %res0, <2 x double> undef, <2 x i64> <i64 2, i64 0>, i8 0)