[X86] Add DAG combine to remove (and X, 1) from in front of a v1i1 scalar to vector.
These can be created by type legalization promoting the inputs to select to match scalar boolean contents. We were trying to pattern match them away during isel, but its better to just remove them from the DAG. I've cleaned up some patterns to not check for this 'and' anymore. But I suspect this has also opened up opportunities for pattern removal. llvm-svn: 325949
This commit is contained in:
parent
39049c05a9
commit
61d6ddbf0a
|
@ -1653,6 +1653,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
|||
|
||||
// We have target-specific dag combine patterns for the following nodes:
|
||||
setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
|
||||
setTargetDAGCombine(ISD::SCALAR_TO_VECTOR);
|
||||
setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
|
||||
setTargetDAGCombine(ISD::INSERT_SUBVECTOR);
|
||||
setTargetDAGCombine(ISD::EXTRACT_SUBVECTOR);
|
||||
|
@ -38042,11 +38043,30 @@ static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG,
|
|||
return SDValue();
|
||||
}
|
||||
|
||||
static SDValue combineScalarToVector(SDNode *N, SelectionDAG &DAG) {
|
||||
EVT VT = N->getValueType(0);
|
||||
SDValue Src = N->getOperand(0);
|
||||
|
||||
// If this is a scalar to vector to v1i1 from an AND with 1, bypass the and.
|
||||
// This occurs frequently in our masked scalar intrinsic code and our
|
||||
// floating point select lowering with AVX512.
|
||||
// TODO: SimplifyDemandedBits instead?
|
||||
if (VT == MVT::v1i1 && Src.getOpcode() == ISD::AND && Src.hasOneUse())
|
||||
if (auto *C = dyn_cast<ConstantSDNode>(Src.getOperand(1)))
|
||||
if (C->getAPIntValue().isOneValue())
|
||||
return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), MVT::v1i1,
|
||||
Src.getOperand(0));
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
|
||||
DAGCombinerInfo &DCI) const {
|
||||
SelectionDAG &DAG = DCI.DAG;
|
||||
switch (N->getOpcode()) {
|
||||
default: break;
|
||||
case ISD::SCALAR_TO_VECTOR:
|
||||
return combineScalarToVector(N, DAG);
|
||||
case ISD::EXTRACT_VECTOR_ELT:
|
||||
case X86ISD::PEXTRW:
|
||||
case X86ISD::PEXTRB:
|
||||
|
|
|
@ -3870,7 +3870,7 @@ multiclass avx512_move_scalar_lowering<string InstrStr, SDNode OpNode,
|
|||
|
||||
def : Pat<(_.VT (OpNode _.RC:$src0,
|
||||
(_.VT (scalar_to_vector
|
||||
(_.EltVT (X86selects (scalar_to_vector (and (i8 (trunc GR32:$mask)), (i8 1))),
|
||||
(_.EltVT (X86selects (scalar_to_vector (i8 (trunc GR32:$mask))),
|
||||
(_.EltVT _.FRC:$src1),
|
||||
(_.EltVT _.FRC:$src2))))))),
|
||||
(!cast<Instruction>(InstrStr#rrk)
|
||||
|
@ -3881,7 +3881,7 @@ def : Pat<(_.VT (OpNode _.RC:$src0,
|
|||
|
||||
def : Pat<(_.VT (OpNode _.RC:$src0,
|
||||
(_.VT (scalar_to_vector
|
||||
(_.EltVT (X86selects (scalar_to_vector (and (i8 (trunc GR32:$mask)), (i8 1))),
|
||||
(_.EltVT (X86selects (scalar_to_vector (i8 (trunc GR32:$mask))),
|
||||
(_.EltVT _.FRC:$src1),
|
||||
(_.EltVT ZeroFP))))))),
|
||||
(!cast<Instruction>(InstrStr#rrkz)
|
||||
|
@ -3993,7 +3993,7 @@ defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
|
|||
defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
|
||||
(v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
|
||||
|
||||
def : Pat<(f32 (X86selects (scalar_to_vector (and GR8:$mask, (i8 1))),
|
||||
def : Pat<(f32 (X86selects (scalar_to_vector GR8:$mask),
|
||||
(f32 FR32X:$src1), (f32 FR32X:$src2))),
|
||||
(COPY_TO_REGCLASS
|
||||
(VMOVSSZrrk (COPY_TO_REGCLASS FR32X:$src2, VR128X),
|
||||
|
@ -4007,7 +4007,7 @@ def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
|
|||
VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
|
||||
(COPY_TO_REGCLASS FR32X:$src1, VR128X)), FR32X)>;
|
||||
|
||||
def : Pat<(f64 (X86selects (scalar_to_vector (and GR8:$mask, (i8 1))),
|
||||
def : Pat<(f64 (X86selects (scalar_to_vector GR8:$mask),
|
||||
(f64 FR64X:$src1), (f64 FR64X:$src2))),
|
||||
(COPY_TO_REGCLASS
|
||||
(VMOVSDZrrk (COPY_TO_REGCLASS FR64X:$src2, VR128X),
|
||||
|
|
|
@ -1117,9 +1117,8 @@ define <4 x float> @add_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float> %c,
|
|||
;
|
||||
; AVX512-LABEL: add_ss_mask:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm1
|
||||
; AVX512-NEXT: kmovw %edi, %k1
|
||||
; AVX512-NEXT: vmovss %xmm1, %xmm0, %xmm2 {%k1}
|
||||
; AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm2 {%k1}
|
||||
; AVX512-NEXT: vmovaps %xmm2, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
%1 = extractelement <4 x float> %a, i64 0
|
||||
|
@ -1172,9 +1171,8 @@ define <2 x double> @add_sd_mask(<2 x double> %a, <2 x double> %b, <2 x double>
|
|||
;
|
||||
; AVX512-LABEL: add_sd_mask:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm1
|
||||
; AVX512-NEXT: kmovw %edi, %k1
|
||||
; AVX512-NEXT: vmovsd %xmm1, %xmm0, %xmm2 {%k1}
|
||||
; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm2 {%k1}
|
||||
; AVX512-NEXT: vmovapd %xmm2, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
%1 = extractelement <2 x double> %a, i64 0
|
||||
|
|
Loading…
Reference in New Issue