[X86][AVX] combineHorizontalPredicateResult - support v16i16/v32i8 reduction on AVX1
Use getPMOVMSKB helper which splits v32i8 MOVMSK calls on pre-AVX2 targets. llvm-svn: 357608
This commit is contained in:
parent
0c27bc2e1f
commit
9e28dddf55
|
@ -34301,11 +34301,8 @@ static SDValue combineHorizontalPredicateResult(SDNode *Extract,
|
||||||
if (Match.getScalarValueSizeInBits() != BitWidth)
|
if (Match.getScalarValueSizeInBits() != BitWidth)
|
||||||
return SDValue();
|
return SDValue();
|
||||||
|
|
||||||
// We require AVX2 for PMOVMSKB for v16i16/v32i8;
|
|
||||||
unsigned MatchSizeInBits = Match.getValueSizeInBits();
|
unsigned MatchSizeInBits = Match.getValueSizeInBits();
|
||||||
if (!(MatchSizeInBits == 128 ||
|
if (!(MatchSizeInBits == 128 || (MatchSizeInBits == 256 && Subtarget.hasAVX())))
|
||||||
(MatchSizeInBits == 256 &&
|
|
||||||
((Subtarget.hasAVX() && BitWidth >= 32) || Subtarget.hasAVX2()))))
|
|
||||||
return SDValue();
|
return SDValue();
|
||||||
|
|
||||||
// Make sure this isn't a vector of 1 element. The perf win from using MOVMSK
|
// Make sure this isn't a vector of 1 element. The perf win from using MOVMSK
|
||||||
|
@ -34344,9 +34341,9 @@ static SDValue combineHorizontalPredicateResult(SDNode *Extract,
|
||||||
|
|
||||||
// The setcc produces an i8 of 0/1, so extend that to the result width and
|
// The setcc produces an i8 of 0/1, so extend that to the result width and
|
||||||
// negate to get the final 0/-1 mask value.
|
// negate to get the final 0/-1 mask value.
|
||||||
SDValue BitcastLogicOp = DAG.getBitcast(MaskSrcVT, Match);
|
|
||||||
SDValue Movmsk = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, BitcastLogicOp);
|
|
||||||
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
|
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
|
||||||
|
SDValue BitcastLogicOp = DAG.getBitcast(MaskSrcVT, Match);
|
||||||
|
SDValue Movmsk = getPMOVMSKB(DL, BitcastLogicOp, DAG, Subtarget);
|
||||||
EVT SetccVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
|
EVT SetccVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
|
||||||
MVT::i32);
|
MVT::i32);
|
||||||
SDValue Setcc = DAG.getSetCC(DL, SetccVT, Movmsk, CmpC, CondCode);
|
SDValue Setcc = DAG.getSetCC(DL, SetccVT, Movmsk, CmpC, CondCode);
|
||||||
|
|
|
@ -673,14 +673,14 @@ define i16 @test_v16i16_sext(<16 x i16> %a0, <16 x i16> %a1) {
|
||||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
|
||||||
; AVX1-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2
|
; AVX1-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2
|
||||||
; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
|
; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
|
||||||
; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
|
; AVX1-NEXT: vpmovmskb %xmm0, %eax
|
||||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
; AVX1-NEXT: vpmovmskb %xmm2, %ecx
|
||||||
; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
|
; AVX1-NEXT: shll $16, %ecx
|
||||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
; AVX1-NEXT: orl %eax, %ecx
|
||||||
; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
|
; AVX1-NEXT: xorl %eax, %eax
|
||||||
; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
|
; AVX1-NEXT: cmpl $-1, %ecx
|
||||||
; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
|
; AVX1-NEXT: sete %al
|
||||||
; AVX1-NEXT: vmovd %xmm0, %eax
|
; AVX1-NEXT: negl %eax
|
||||||
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
|
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
|
||||||
; AVX1-NEXT: vzeroupper
|
; AVX1-NEXT: vzeroupper
|
||||||
; AVX1-NEXT: retq
|
; AVX1-NEXT: retq
|
||||||
|
@ -867,17 +867,13 @@ define i8 @test_v32i8_sext(<32 x i8> %a0, <32 x i8> %a1) {
|
||||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
|
||||||
; AVX1-NEXT: vpcmpgtb %xmm2, %xmm3, %xmm2
|
; AVX1-NEXT: vpcmpgtb %xmm2, %xmm3, %xmm2
|
||||||
; AVX1-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
|
; AVX1-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
|
||||||
; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
|
; AVX1-NEXT: vpmovmskb %xmm0, %eax
|
||||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
; AVX1-NEXT: vpmovmskb %xmm2, %ecx
|
||||||
; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
|
; AVX1-NEXT: shll $16, %ecx
|
||||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
; AVX1-NEXT: orl %eax, %ecx
|
||||||
; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
|
; AVX1-NEXT: cmpl $-1, %ecx
|
||||||
; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
|
; AVX1-NEXT: sete %al
|
||||||
; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
|
; AVX1-NEXT: negb %al
|
||||||
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
|
|
||||||
; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
||||||
; AVX1-NEXT: vpextrb $0, %xmm0, %eax
|
|
||||||
; AVX1-NEXT: # kill: def $al killed $al killed $eax
|
|
||||||
; AVX1-NEXT: vzeroupper
|
; AVX1-NEXT: vzeroupper
|
||||||
; AVX1-NEXT: retq
|
; AVX1-NEXT: retq
|
||||||
;
|
;
|
||||||
|
@ -1555,21 +1551,17 @@ define i1 @bool_reduction_v32i8(<32 x i8> %x, <32 x i8> %y) {
|
||||||
;
|
;
|
||||||
; AVX1-LABEL: bool_reduction_v32i8:
|
; AVX1-LABEL: bool_reduction_v32i8:
|
||||||
; AVX1: # %bb.0:
|
; AVX1: # %bb.0:
|
||||||
; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm2
|
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
|
||||||
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
|
||||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
; AVX1-NEXT: vpcmpeqb %xmm2, %xmm3, %xmm2
|
||||||
; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
|
; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
|
||||||
; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
|
; AVX1-NEXT: vpmovmskb %xmm0, %eax
|
||||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
; AVX1-NEXT: vpmovmskb %xmm2, %ecx
|
||||||
; AVX1-NEXT: vpand %xmm0, %xmm1, %xmm0
|
; AVX1-NEXT: shll $16, %ecx
|
||||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
; AVX1-NEXT: orl %eax, %ecx
|
||||||
; AVX1-NEXT: vpand %xmm0, %xmm1, %xmm0
|
; AVX1-NEXT: cmpl $-1, %ecx
|
||||||
; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
|
; AVX1-NEXT: sete %al
|
||||||
; AVX1-NEXT: vpand %xmm0, %xmm1, %xmm0
|
; AVX1-NEXT: negb %al
|
||||||
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
|
|
||||||
; AVX1-NEXT: vpand %xmm0, %xmm1, %xmm0
|
|
||||||
; AVX1-NEXT: vpextrb $0, %xmm0, %eax
|
|
||||||
; AVX1-NEXT: # kill: def $al killed $al killed $eax
|
|
||||||
; AVX1-NEXT: vzeroupper
|
; AVX1-NEXT: vzeroupper
|
||||||
; AVX1-NEXT: retq
|
; AVX1-NEXT: retq
|
||||||
;
|
;
|
||||||
|
|
|
@ -611,14 +611,13 @@ define i16 @test_v16i16_sext(<16 x i16> %a0, <16 x i16> %a1) {
|
||||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
|
||||||
; AVX1-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2
|
; AVX1-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2
|
||||||
; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
|
; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
|
||||||
; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
|
; AVX1-NEXT: vpmovmskb %xmm0, %ecx
|
||||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
; AVX1-NEXT: vpmovmskb %xmm2, %edx
|
||||||
; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
|
; AVX1-NEXT: shll $16, %edx
|
||||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
; AVX1-NEXT: xorl %eax, %eax
|
||||||
; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
|
; AVX1-NEXT: orl %ecx, %edx
|
||||||
; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
|
; AVX1-NEXT: setne %al
|
||||||
; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
|
; AVX1-NEXT: negl %eax
|
||||||
; AVX1-NEXT: vmovd %xmm0, %eax
|
|
||||||
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
|
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
|
||||||
; AVX1-NEXT: vzeroupper
|
; AVX1-NEXT: vzeroupper
|
||||||
; AVX1-NEXT: retq
|
; AVX1-NEXT: retq
|
||||||
|
@ -791,17 +790,12 @@ define i8 @test_v32i8_sext(<32 x i8> %a0, <32 x i8> %a1) {
|
||||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
|
||||||
; AVX1-NEXT: vpcmpgtb %xmm2, %xmm3, %xmm2
|
; AVX1-NEXT: vpcmpgtb %xmm2, %xmm3, %xmm2
|
||||||
; AVX1-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
|
; AVX1-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
|
||||||
; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
|
; AVX1-NEXT: vpmovmskb %xmm0, %eax
|
||||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
; AVX1-NEXT: vpmovmskb %xmm2, %ecx
|
||||||
; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
|
; AVX1-NEXT: shll $16, %ecx
|
||||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
; AVX1-NEXT: orl %eax, %ecx
|
||||||
; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
|
; AVX1-NEXT: setne %al
|
||||||
; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
|
; AVX1-NEXT: negb %al
|
||||||
; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
|
|
||||||
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
|
|
||||||
; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
|
|
||||||
; AVX1-NEXT: vpextrb $0, %xmm0, %eax
|
|
||||||
; AVX1-NEXT: # kill: def $al killed $al killed $eax
|
|
||||||
; AVX1-NEXT: vzeroupper
|
; AVX1-NEXT: vzeroupper
|
||||||
; AVX1-NEXT: retq
|
; AVX1-NEXT: retq
|
||||||
;
|
;
|
||||||
|
@ -1490,21 +1484,16 @@ define i1 @bool_reduction_v32i8(<32 x i8> %x, <32 x i8> %y) {
|
||||||
;
|
;
|
||||||
; AVX1-LABEL: bool_reduction_v32i8:
|
; AVX1-LABEL: bool_reduction_v32i8:
|
||||||
; AVX1: # %bb.0:
|
; AVX1: # %bb.0:
|
||||||
; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm2
|
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
|
||||||
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
|
||||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
; AVX1-NEXT: vpcmpeqb %xmm2, %xmm3, %xmm2
|
||||||
; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
|
; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
|
||||||
; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
|
; AVX1-NEXT: vpmovmskb %xmm0, %eax
|
||||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
; AVX1-NEXT: vpmovmskb %xmm2, %ecx
|
||||||
; AVX1-NEXT: vpor %xmm0, %xmm1, %xmm0
|
; AVX1-NEXT: shll $16, %ecx
|
||||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
; AVX1-NEXT: orl %eax, %ecx
|
||||||
; AVX1-NEXT: vpor %xmm0, %xmm1, %xmm0
|
; AVX1-NEXT: setne %al
|
||||||
; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
|
; AVX1-NEXT: negb %al
|
||||||
; AVX1-NEXT: vpor %xmm0, %xmm1, %xmm0
|
|
||||||
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
|
|
||||||
; AVX1-NEXT: vpor %xmm0, %xmm1, %xmm0
|
|
||||||
; AVX1-NEXT: vpextrb $0, %xmm0, %eax
|
|
||||||
; AVX1-NEXT: # kill: def $al killed $al killed $eax
|
|
||||||
; AVX1-NEXT: vzeroupper
|
; AVX1-NEXT: vzeroupper
|
||||||
; AVX1-NEXT: retq
|
; AVX1-NEXT: retq
|
||||||
;
|
;
|
||||||
|
|
Loading…
Reference in New Issue