[X86] Add support for using 512-bit PSUBUS to combineSelect.

The code already support 128 and 256 and even knows to split 256 for AVX1. So we really just needed to stop looking for specific VTs and subtarget features and just look for legal VTs with i8/i16 elements.

While there, add some curly braces around outer if statement bodies that contain only another if. It makes all the closing curly braces look more regular.

llvm-svn: 340128
This commit is contained in:
Craig Topper 2018-08-18 18:51:03 +00:00
parent b40a1d5f84
commit 40c9559b74
2 changed files with 10 additions and 9 deletions

View File

@ -33029,9 +33029,10 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
// Match VSELECTs into subs with unsigned saturation.
if (N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC &&
// psubus is available in SSE2 and AVX2 for i8 and i16 vectors.
((Subtarget.hasSSE2() && (VT == MVT::v16i8 || VT == MVT::v8i16)) ||
(Subtarget.hasAVX() && (VT == MVT::v32i8 || VT == MVT::v16i16)))) {
// psubus is available in SSE2 for i8 and i16 vectors.
Subtarget.hasSSE2() &&
(VT.getVectorElementType() == MVT::i8 ||
VT.getVectorElementType() == MVT::i16)) {
ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
// Check if one of the arms of the VSELECT is a zero vector. If it's on the
@ -33062,7 +33063,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
return SplitOpsAndApply(DAG, Subtarget, DL, VT, { OpLHS, OpRHS },
SUBUSBuilder);
if (auto *OpRHSBV = dyn_cast<BuildVectorSDNode>(OpRHS))
if (auto *OpRHSBV = dyn_cast<BuildVectorSDNode>(OpRHS)) {
if (isa<BuildVectorSDNode>(CondRHS)) {
// If the RHS is a constant we have to reverse the const
// canonicalization.
@ -33083,7 +33084,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
// FIXME: Would it be better to use computeKnownBits to determine
// whether it's safe to decanonicalize the xor?
// x s< 0 ? x^C : 0 --> subus x, C
if (auto *OpRHSConst = OpRHSBV->getConstantSplatNode())
if (auto *OpRHSConst = OpRHSBV->getConstantSplatNode()) {
if (CC == ISD::SETLT && Other.getOpcode() == ISD::XOR &&
ISD::isBuildVectorAllZeros(CondRHS.getNode()) &&
OpRHSConst->getAPIntValue().isSignMask()) {
@ -33093,7 +33094,9 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
return SplitOpsAndApply(DAG, Subtarget, DL, VT, { OpLHS, OpRHS },
SUBUSBuilder);
}
}
}
}
}
}

View File

@ -1186,8 +1186,7 @@ define <64 x i8> @test17(<64 x i8> %x, i8 zeroext %w) nounwind {
; AVX512-LABEL: test17:
; AVX512: # %bb.0: # %vector.ph
; AVX512-NEXT: vpbroadcastb %edi, %zmm1
; AVX512-NEXT: vpcmpnltub %zmm1, %zmm0, %k1
; AVX512-NEXT: vpsubb %zmm1, %zmm0, %zmm0 {%k1} {z}
; AVX512-NEXT: vpsubusb %zmm1, %zmm0, %zmm0
; AVX512-NEXT: retq
vector.ph:
%0 = insertelement <64 x i8> undef, i8 %w, i32 0
@ -1236,8 +1235,7 @@ define <32 x i16> @test18(<32 x i16> %x, i16 zeroext %w) nounwind {
; AVX512-LABEL: test18:
; AVX512: # %bb.0: # %vector.ph
; AVX512-NEXT: vpbroadcastw %edi, %zmm1
; AVX512-NEXT: vpcmpnltuw %zmm1, %zmm0, %k1
; AVX512-NEXT: vpsubw %zmm1, %zmm0, %zmm0 {%k1} {z}
; AVX512-NEXT: vpsubusw %zmm1, %zmm0, %zmm0
; AVX512-NEXT: retq
vector.ph:
%0 = insertelement <32 x i16> undef, i16 %w, i32 0