[X86] Add DAG combine to remove (and X, 1) from in front of a v1i1 scalar to vector.

These can be created by type legalization promoting the inputs to select to match scalar boolean contents.

We were trying to pattern match them away during isel, but its better to just remove them from the DAG.

I've cleaned up some patterns to not check for this 'and' anymore. But I suspect this has also opened up opportunities for pattern removal.

llvm-svn: 325949
This commit is contained in:
Craig Topper 2018-02-23 20:13:42 +00:00
parent 39049c05a9
commit 61d6ddbf0a
3 changed files with 26 additions and 8 deletions

View File

@ -1653,6 +1653,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// We have target-specific dag combine patterns for the following nodes:
setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
setTargetDAGCombine(ISD::SCALAR_TO_VECTOR);
setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
setTargetDAGCombine(ISD::INSERT_SUBVECTOR);
setTargetDAGCombine(ISD::EXTRACT_SUBVECTOR);
@ -38042,11 +38043,30 @@ static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
static SDValue combineScalarToVector(SDNode *N, SelectionDAG &DAG) {
EVT VT = N->getValueType(0);
SDValue Src = N->getOperand(0);
// If this is a scalar to vector to v1i1 from an AND with 1, bypass the and.
// This occurs frequently in our masked scalar intrinsic code and our
// floating point select lowering with AVX512.
// TODO: SimplifyDemandedBits instead?
if (VT == MVT::v1i1 && Src.getOpcode() == ISD::AND && Src.hasOneUse())
if (auto *C = dyn_cast<ConstantSDNode>(Src.getOperand(1)))
if (C->getAPIntValue().isOneValue())
return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), MVT::v1i1,
Src.getOperand(0));
return SDValue();
}
SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
switch (N->getOpcode()) {
default: break;
case ISD::SCALAR_TO_VECTOR:
return combineScalarToVector(N, DAG);
case ISD::EXTRACT_VECTOR_ELT:
case X86ISD::PEXTRW:
case X86ISD::PEXTRB:

View File

@ -3870,7 +3870,7 @@ multiclass avx512_move_scalar_lowering<string InstrStr, SDNode OpNode,
def : Pat<(_.VT (OpNode _.RC:$src0,
(_.VT (scalar_to_vector
(_.EltVT (X86selects (scalar_to_vector (and (i8 (trunc GR32:$mask)), (i8 1))),
(_.EltVT (X86selects (scalar_to_vector (i8 (trunc GR32:$mask))),
(_.EltVT _.FRC:$src1),
(_.EltVT _.FRC:$src2))))))),
(!cast<Instruction>(InstrStr#rrk)
@ -3881,7 +3881,7 @@ def : Pat<(_.VT (OpNode _.RC:$src0,
def : Pat<(_.VT (OpNode _.RC:$src0,
(_.VT (scalar_to_vector
(_.EltVT (X86selects (scalar_to_vector (and (i8 (trunc GR32:$mask)), (i8 1))),
(_.EltVT (X86selects (scalar_to_vector (i8 (trunc GR32:$mask))),
(_.EltVT _.FRC:$src1),
(_.EltVT ZeroFP))))))),
(!cast<Instruction>(InstrStr#rrkz)
@ -3993,7 +3993,7 @@ defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
(v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
def : Pat<(f32 (X86selects (scalar_to_vector (and GR8:$mask, (i8 1))),
def : Pat<(f32 (X86selects (scalar_to_vector GR8:$mask),
(f32 FR32X:$src1), (f32 FR32X:$src2))),
(COPY_TO_REGCLASS
(VMOVSSZrrk (COPY_TO_REGCLASS FR32X:$src2, VR128X),
@ -4007,7 +4007,7 @@ def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
(COPY_TO_REGCLASS FR32X:$src1, VR128X)), FR32X)>;
def : Pat<(f64 (X86selects (scalar_to_vector (and GR8:$mask, (i8 1))),
def : Pat<(f64 (X86selects (scalar_to_vector GR8:$mask),
(f64 FR64X:$src1), (f64 FR64X:$src2))),
(COPY_TO_REGCLASS
(VMOVSDZrrk (COPY_TO_REGCLASS FR64X:$src2, VR128X),

View File

@ -1117,9 +1117,8 @@ define <4 x float> @add_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float> %c,
;
; AVX512-LABEL: add_ss_mask:
; AVX512: # %bb.0:
; AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm1
; AVX512-NEXT: kmovw %edi, %k1
; AVX512-NEXT: vmovss %xmm1, %xmm0, %xmm2 {%k1}
; AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm2 {%k1}
; AVX512-NEXT: vmovaps %xmm2, %xmm0
; AVX512-NEXT: retq
%1 = extractelement <4 x float> %a, i64 0
@ -1172,9 +1171,8 @@ define <2 x double> @add_sd_mask(<2 x double> %a, <2 x double> %b, <2 x double>
;
; AVX512-LABEL: add_sd_mask:
; AVX512: # %bb.0:
; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm1
; AVX512-NEXT: kmovw %edi, %k1
; AVX512-NEXT: vmovsd %xmm1, %xmm0, %xmm2 {%k1}
; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm2 {%k1}
; AVX512-NEXT: vmovapd %xmm2, %xmm0
; AVX512-NEXT: retq
%1 = extractelement <2 x double> %a, i64 0