[X86] combineExtractWithShuffle - handle extract(truncate(x), 0)
Eventually we need to generalize combineExtractWithShuffle to handle all faux shuffles and handle truncate (and X86ISD::VTRUNC etc.) there, but we're not ready yet (still creates nodes on the fly, incomplete DemandedElts support, bad use of recursive Depth limit). llvm-svn: 369134
This commit is contained in:
parent
2632c677f8
commit
8ff1b7de4d
|
@ -35202,6 +35202,7 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG,
|
||||||
if (DCI.isBeforeLegalizeOps())
|
if (DCI.isBeforeLegalizeOps())
|
||||||
return SDValue();
|
return SDValue();
|
||||||
|
|
||||||
|
SDLoc dl(N);
|
||||||
SDValue Src = N->getOperand(0);
|
SDValue Src = N->getOperand(0);
|
||||||
SDValue Idx = N->getOperand(1);
|
SDValue Idx = N->getOperand(1);
|
||||||
|
|
||||||
|
@ -35223,6 +35224,16 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG,
|
||||||
return DAG.getBitcast(VT, SrcOp);
|
return DAG.getBitcast(VT, SrcOp);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Handle extract(truncate(x)) for 0'th index.
|
||||||
|
// TODO: Treat this as a faux shuffle?
|
||||||
|
// TODO: When can we use this for general indices?
|
||||||
|
if (ISD::TRUNCATE == Src.getOpcode() && SrcVT.is128BitVector() &&
|
||||||
|
isNullConstant(Idx)) {
|
||||||
|
Src = extract128BitVector(Src.getOperand(0), 0, DAG, dl);
|
||||||
|
Src = DAG.getBitcast(SrcVT, Src);
|
||||||
|
return DAG.getNode(N->getOpcode(), dl, VT, Src, Idx);
|
||||||
|
}
|
||||||
|
|
||||||
// Resolve the target shuffle inputs and mask.
|
// Resolve the target shuffle inputs and mask.
|
||||||
SmallVector<int, 16> Mask;
|
SmallVector<int, 16> Mask;
|
||||||
SmallVector<SDValue, 2> Ops;
|
SmallVector<SDValue, 2> Ops;
|
||||||
|
@ -35260,7 +35271,6 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG,
|
||||||
return SDValue();
|
return SDValue();
|
||||||
|
|
||||||
int SrcIdx = Mask[N->getConstantOperandVal(1)];
|
int SrcIdx = Mask[N->getConstantOperandVal(1)];
|
||||||
SDLoc dl(N);
|
|
||||||
|
|
||||||
// If the shuffle source element is undef/zero then we can just accept it.
|
// If the shuffle source element is undef/zero then we can just accept it.
|
||||||
if (SrcIdx == SM_SentinelUndef)
|
if (SrcIdx == SM_SentinelUndef)
|
||||||
|
|
|
@ -1828,9 +1828,7 @@ define i8 @test_v16i8(<16 x i8> %a0) {
|
||||||
; AVX512BW-NEXT: vpmullw %ymm1, %ymm0, %ymm0
|
; AVX512BW-NEXT: vpmullw %ymm1, %ymm0, %ymm0
|
||||||
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm1
|
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm1
|
||||||
; AVX512BW-NEXT: vpsrlw $8, %xmm1, %xmm1
|
; AVX512BW-NEXT: vpsrlw $8, %xmm1, %xmm1
|
||||||
; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
|
; AVX512BW-NEXT: vpmullw %xmm1, %xmm0, %xmm0
|
||||||
; AVX512BW-NEXT: vpmullw %ymm1, %ymm0, %ymm0
|
|
||||||
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
|
|
||||||
; AVX512BW-NEXT: vpextrb $0, %xmm0, %eax
|
; AVX512BW-NEXT: vpextrb $0, %xmm0, %eax
|
||||||
; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
|
; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
|
||||||
; AVX512BW-NEXT: vzeroupper
|
; AVX512BW-NEXT: vzeroupper
|
||||||
|
@ -1852,9 +1850,7 @@ define i8 @test_v16i8(<16 x i8> %a0) {
|
||||||
; AVX512BWVL-NEXT: vpmullw %ymm1, %ymm0, %ymm0
|
; AVX512BWVL-NEXT: vpmullw %ymm1, %ymm0, %ymm0
|
||||||
; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm1
|
; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm1
|
||||||
; AVX512BWVL-NEXT: vpsrlw $8, %xmm1, %xmm1
|
; AVX512BWVL-NEXT: vpsrlw $8, %xmm1, %xmm1
|
||||||
; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
|
; AVX512BWVL-NEXT: vpmullw %xmm1, %xmm0, %xmm0
|
||||||
; AVX512BWVL-NEXT: vpmullw %ymm1, %ymm0, %ymm0
|
|
||||||
; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
|
|
||||||
; AVX512BWVL-NEXT: vpextrb $0, %xmm0, %eax
|
; AVX512BWVL-NEXT: vpextrb $0, %xmm0, %eax
|
||||||
; AVX512BWVL-NEXT: # kill: def $al killed $al killed $eax
|
; AVX512BWVL-NEXT: # kill: def $al killed $al killed $eax
|
||||||
; AVX512BWVL-NEXT: vzeroupper
|
; AVX512BWVL-NEXT: vzeroupper
|
||||||
|
@ -1879,10 +1875,7 @@ define i8 @test_v16i8(<16 x i8> %a0) {
|
||||||
; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
|
; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
|
||||||
; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1
|
; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1
|
||||||
; AVX512DQ-NEXT: vpsrlw $8, %xmm1, %xmm1
|
; AVX512DQ-NEXT: vpsrlw $8, %xmm1, %xmm1
|
||||||
; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
|
; AVX512DQ-NEXT: vpmullw %xmm1, %xmm0, %xmm0
|
||||||
; AVX512DQ-NEXT: vpmullw %ymm1, %ymm0, %ymm0
|
|
||||||
; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
|
|
||||||
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
|
|
||||||
; AVX512DQ-NEXT: vpextrb $0, %xmm0, %eax
|
; AVX512DQ-NEXT: vpextrb $0, %xmm0, %eax
|
||||||
; AVX512DQ-NEXT: # kill: def $al killed $al killed $eax
|
; AVX512DQ-NEXT: # kill: def $al killed $al killed $eax
|
||||||
; AVX512DQ-NEXT: vzeroupper
|
; AVX512DQ-NEXT: vzeroupper
|
||||||
|
@ -1907,10 +1900,7 @@ define i8 @test_v16i8(<16 x i8> %a0) {
|
||||||
; AVX512DQVL-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
|
; AVX512DQVL-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
|
||||||
; AVX512DQVL-NEXT: vpmovdb %zmm1, %xmm1
|
; AVX512DQVL-NEXT: vpmovdb %zmm1, %xmm1
|
||||||
; AVX512DQVL-NEXT: vpsrlw $8, %xmm1, %xmm1
|
; AVX512DQVL-NEXT: vpsrlw $8, %xmm1, %xmm1
|
||||||
; AVX512DQVL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
|
; AVX512DQVL-NEXT: vpmullw %xmm1, %xmm0, %xmm0
|
||||||
; AVX512DQVL-NEXT: vpmullw %ymm1, %ymm0, %ymm0
|
|
||||||
; AVX512DQVL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
|
|
||||||
; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
|
|
||||||
; AVX512DQVL-NEXT: vpextrb $0, %xmm0, %eax
|
; AVX512DQVL-NEXT: vpextrb $0, %xmm0, %eax
|
||||||
; AVX512DQVL-NEXT: # kill: def $al killed $al killed $eax
|
; AVX512DQVL-NEXT: # kill: def $al killed $al killed $eax
|
||||||
; AVX512DQVL-NEXT: vzeroupper
|
; AVX512DQVL-NEXT: vzeroupper
|
||||||
|
|
Loading…
Reference in New Issue