[X86][XOP] Added VPERMIL2PD/VPERMIL2PS raw mask decoding for target shuffle combines
llvm-svn: 271834
This commit is contained in:
parent
312071943c
commit
64c6de4525
|
@ -512,6 +512,46 @@ void DecodeVPERMILPMask(MVT VT, ArrayRef<uint64_t> RawMask,
|
|||
}
|
||||
}
|
||||
|
||||
void DecodeVPERMIL2PMask(MVT VT, unsigned M2Z, ArrayRef<uint64_t> RawMask,
|
||||
SmallVectorImpl<int> &ShuffleMask) {
|
||||
unsigned VecSize = VT.getSizeInBits();
|
||||
unsigned EltSize = VT.getScalarSizeInBits();
|
||||
unsigned NumLanes = VecSize / 128;
|
||||
unsigned NumEltsPerLane = VT.getVectorNumElements() / NumLanes;
|
||||
assert((VecSize == 128 || VecSize == 256) &&
|
||||
"Unexpected vector size");
|
||||
assert((EltSize == 32 || EltSize == 64) && "Unexpected element size");
|
||||
|
||||
for (unsigned i = 0, e = RawMask.size(); i < e; ++i) {
|
||||
// VPERMIL2 Operation.
|
||||
// Bits[3] - Match Bit.
|
||||
// Bits[2:1] - (Per Lane) PD Shuffle Mask.
|
||||
// Bits[2:0] - (Per Lane) PS Shuffle Mask.
|
||||
uint64_t Selector = RawMask[i];
|
||||
int MatchBit = (Selector >> 3) & 0x1;
|
||||
|
||||
// M2Z[0:1] MatchBit
|
||||
// 0Xb X Source selected by Selector index.
|
||||
// 10b 0 Source selected by Selector index.
|
||||
// 10b 1 Zero.
|
||||
// 11b 0 Zero.
|
||||
// 11b 1 Source selected by Selector index.
|
||||
if ((M2Z & 0x2) != 0 && MatchBit != (M2Z & 0x1)) {
|
||||
ShuffleMask.push_back(SM_SentinelZero);
|
||||
continue;
|
||||
}
|
||||
|
||||
unsigned Index = i & ~(NumEltsPerLane - 1);
|
||||
if (EltSize == 64)
|
||||
Index += (Selector >> 1) & 0x1;
|
||||
else
|
||||
Index += Selector & 0x3;
|
||||
|
||||
unsigned SrcOffset = (Selector >> 2) & 1;
|
||||
ShuffleMask.push_back((int)(SrcOffset + Index));
|
||||
}
|
||||
}
|
||||
|
||||
void DecodeVPERMVMask(ArrayRef<uint64_t> RawMask,
|
||||
SmallVectorImpl<int> &ShuffleMask) {
|
||||
for (int i = 0, e = RawMask.size(); i < e; ++i) {
|
||||
|
|
|
@ -138,6 +138,10 @@ void DecodeINSERTQIMask(int Len, int Idx,
|
|||
void DecodeVPERMILPMask(MVT VT, ArrayRef<uint64_t> RawMask,
|
||||
SmallVectorImpl<int> &ShuffleMask);
|
||||
|
||||
/// Decode a VPERMIL2PD/VPERMIL2PS variable mask from a raw array of constants.
|
||||
void DecodeVPERMIL2PMask(MVT VT, unsigned M2Z, ArrayRef<uint64_t> RawMask,
|
||||
SmallVectorImpl<int> &ShuffleMask);
|
||||
|
||||
/// Decode a VPERM W/D/Q/PS/PD mask from a raw array of constants.
|
||||
void DecodeVPERMVMask(ArrayRef<uint64_t> RawMask,
|
||||
SmallVectorImpl<int> &ShuffleMask);
|
||||
|
|
|
@ -4937,6 +4937,11 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
|
|||
SDValue CtrlNode = N->getOperand(3);
|
||||
if (ConstantSDNode *CtrlOp = dyn_cast<ConstantSDNode>(CtrlNode)) {
|
||||
unsigned CtrlImm = CtrlOp->getZExtValue();
|
||||
SmallVector<uint64_t, 32> RawMask;
|
||||
if (getTargetShuffleMaskIndices(MaskNode, MaskEltSize, RawMask)) {
|
||||
DecodeVPERMIL2PMask(VT, CtrlImm, RawMask, Mask);
|
||||
break;
|
||||
}
|
||||
if (auto *C = getTargetShuffleMaskConstant(MaskNode)) {
|
||||
DecodeVPERMIL2PMask(C, CtrlImm, MaskEltSize, Mask);
|
||||
break;
|
||||
|
|
|
@ -13,10 +13,7 @@ declare <16 x i8> @llvm.x86.xop.vpperm(<16 x i8>, <16 x i8>, <16 x i8>) nounwind
|
|||
define <2 x double> @combine_vpermil2pd_identity(<2 x double> %a0, <2 x double> %a1) {
|
||||
; CHECK-LABEL: combine_vpermil2pd_identity:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: movl $2, %eax
|
||||
; CHECK-NEXT: vmovq %rax, %xmm2
|
||||
; CHECK-NEXT: vpermil2pd $0, %xmm2, %xmm0, %xmm1, %xmm0
|
||||
; CHECK-NEXT: vpermil2pd $0, %xmm2, %xmm0, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vmovaps %xmm1, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res0 = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a1, <2 x double> %a0, <2 x i64> <i64 2, i64 0>, i8 0)
|
||||
%res1 = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %res0, <2 x double> undef, <2 x i64> <i64 2, i64 0>, i8 0)
|
||||
|
|
Loading…
Reference in New Issue