AVX2 optimization.

Added generation of VPSHUB instruction for <32 x i8> vector shuffle when possible.

llvm-svn: 163312
This commit is contained in:
Elena Demikhovsky 2012-09-06 12:42:01 +00:00
parent 3ecf916c33
commit 42777877c2
2 changed files with 51 additions and 0 deletions

View File

@ -6011,6 +6011,40 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp,
return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, NewV); return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, NewV);
} }
// v32i8 shuffles - Translate to VPSHUFB if possible.
static
SDValue LowerVECTOR_SHUFFLEv32i8(ShuffleVectorSDNode *SVOp,
SelectionDAG &DAG,
const X86TargetLowering &TLI) {
EVT VT = SVOp->getValueType(0);
SDValue V1 = SVOp->getOperand(0);
SDValue V2 = SVOp->getOperand(1);
DebugLoc dl = SVOp->getDebugLoc();
ArrayRef<int> MaskVals = SVOp->getMask();
bool V2IsUndef = V2.getOpcode() == ISD::UNDEF;
if (VT != MVT::v32i8 || !TLI.getSubtarget()->hasAVX2() || !V2IsUndef)
return SDValue();
SmallVector<SDValue,32> pshufbMask;
for (unsigned i = 0; i != 32; i++) {
int EltIdx = MaskVals[i];
if (EltIdx < 0 || EltIdx >= 32)
EltIdx = 0x80;
else {
if ((EltIdx >= 16 && i < 16) || (EltIdx < 16 && i >= 16))
// Cross lane is not allowed.
return SDValue();
EltIdx &= 0xf;
}
pshufbMask.push_back(DAG.getConstant(EltIdx, MVT::i8));
}
return DAG.getNode(X86ISD::PSHUFB, dl, MVT::v32i8, V1,
DAG.getNode(ISD::BUILD_VECTOR, dl,
MVT::v32i8, &pshufbMask[0], 32));
}
/// RewriteAsNarrowerShuffle - Try rewriting v8i16 and v16i8 shuffles as 4 wide /// RewriteAsNarrowerShuffle - Try rewriting v8i16 and v16i8 shuffles as 4 wide
/// ones, or rewriting v4i32 / v4f32 as 2 wide ones if possible. This can be /// ones, or rewriting v4i32 / v4f32 as 2 wide ones if possible. This can be
/// done when every pair / quad of shuffle mask elements point to elements in /// done when every pair / quad of shuffle mask elements point to elements in
@ -6837,6 +6871,12 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
return NewOp; return NewOp;
} }
if (VT == MVT::v32i8) {
SDValue NewOp = LowerVECTOR_SHUFFLEv32i8(SVOp, DAG, *this);
if (NewOp.getNode())
return NewOp;
}
// Handle all 128-bit wide vectors with 4 elements, and match them with // Handle all 128-bit wide vectors with 4 elements, and match them with
// several different shuffle types. // several different shuffle types.
if (NumElems == 4 && VT.is128BitVector()) if (NumElems == 4 && VT.is128BitVector())

View File

@ -26,3 +26,14 @@ entry:
%shuffle.i = shufflevector <16 x i16> %src1, <16 x i16> %src1, <16 x i32> <i32 3, i32 undef, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7, i32 11, i32 10, i32 9, i32 8, i32 12, i32 13, i32 14, i32 15> %shuffle.i = shufflevector <16 x i16> %src1, <16 x i16> %src1, <16 x i32> <i32 3, i32 undef, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7, i32 11, i32 10, i32 9, i32 8, i32 12, i32 13, i32 14, i32 15>
ret <16 x i16> %shuffle.i ret <16 x i16> %shuffle.i
} }
; CHECK: vpshufb_test
; CHECK; vpshufb {{.*\(%r.*}}, %ymm
; CHECK: ret
define <32 x i8> @vpshufb_test(<32 x i8> %a) nounwind {
%S = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15,
i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15,
i32 18, i32 19, i32 30, i32 16, i32 25, i32 23, i32 17, i32 25,
i32 20, i32 19, i32 31, i32 17, i32 23, i32 undef, i32 29, i32 18>
ret <32 x i8>%S
}