[X86][SSE] detectAVGPattern - Match zext(or(x,y)) 'add like' patterns (PR41316)

Fixes PR41316 where the expanded PAVG intrinsic had had one of its ADDs turned into an OR due to its operands having no conflicting bits.

llvm-svn: 357351
This commit is contained in:
Simon Pilgrim 2019-03-30 17:12:29 +00:00
parent b5498cbf64
commit 10c9032c02
2 changed files with 28 additions and 69 deletions

View File

@ -38203,10 +38203,19 @@ static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG,
AVGBuilder);
}
// Matches 'add like' patterns.
// TODO: Extend this to include or/zext cases.
// Matches 'add like' patterns: add(Op0,Op1) + zext(or(Op0,Op1)).
// Match the or case only if its 'add-like' - can be replaced by an add.
auto FindAddLike = [&](SDValue V, SDValue &Op0, SDValue &Op1) {
if (ISD::ADD != V.getOpcode())
if (ISD::ADD == V.getOpcode()) {
Op0 = V.getOperand(0);
Op1 = V.getOperand(1);
return true;
}
if (ISD::ZERO_EXTEND != V.getOpcode())
return false;
V = V.getOperand(0);
if (V.getValueType() != VT || ISD::OR != V.getOpcode() ||
!DAG.haveNoCommonBitsSet(V.getOperand(0), V.getOperand(1)))
return false;
Op0 = V.getOperand(0);
Op1 = V.getOperand(1);
@ -38222,7 +38231,7 @@ static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG,
Operands[1] = Op1;
// Now we have three operands of two additions. Check that one of them is a
// constant vector with ones, and the other two are promoted from i8/i16.
// constant vector with ones, and the other two can be promoted from i8/i16.
for (int i = 0; i < 3; ++i) {
if (!IsConstVectorInRange(Operands[i], 1, 1))
continue;
@ -38230,14 +38239,16 @@ static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG,
// Check if Operands[0] and Operands[1] are results of type promotion.
for (int j = 0; j < 2; ++j)
if (Operands[j].getValueType() != VT) {
if (Operands[j].getOpcode() != ISD::ZERO_EXTEND ||
Operands[j].getOperand(0).getValueType() != VT)
return SDValue();
Operands[j] = Operands[j].getOperand(0);
}
// The pattern is detected, emit X86ISD::AVG instruction(s).
return SplitOpsAndApply(DAG, Subtarget, DL, VT,
{ Operands[0].getOperand(0),
Operands[1].getOperand(0) }, AVGBuilder);
return SplitOpsAndApply(DAG, Subtarget, DL, VT, {Operands[0], Operands[1]},
AVGBuilder);
}
return SDValue();

View File

@ -2479,67 +2479,15 @@ define <2 x i64> @PR41316(<2 x i64>, <2 x i64>) {
; SSE2: # %bb.0:
; SSE2-NEXT: psllw $2, %xmm0
; SSE2-NEXT: psllw $2, %xmm1
; SSE2-NEXT: pxor %xmm2, %xmm2
; SSE2-NEXT: movdqa %xmm1, %xmm3
; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
; SSE2-NEXT: por {{.*}}(%rip), %xmm0
; SSE2-NEXT: movdqa %xmm0, %xmm4
; SSE2-NEXT: punpckhwd {{.*#+}} xmm4 = xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
; SSE2-NEXT: paddd %xmm3, %xmm4
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
; SSE2-NEXT: paddd %xmm1, %xmm0
; SSE2-NEXT: pslld $15, %xmm4
; SSE2-NEXT: psrad $16, %xmm4
; SSE2-NEXT: pslld $15, %xmm0
; SSE2-NEXT: psrad $16, %xmm0
; SSE2-NEXT: packssdw %xmm4, %xmm0
; SSE2-NEXT: pavgw %xmm1, %xmm0
; SSE2-NEXT: retq
;
; AVX1-LABEL: PR41316:
; AVX1: # %bb.0:
; AVX1-NEXT: vpsllw $2, %xmm0, %xmm0
; AVX1-NEXT: vpsllw $2, %xmm1, %xmm1
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm3 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
; AVX1-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
; AVX1-NEXT: vpaddd %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpsrld $1, %xmm2, %xmm1
; AVX1-NEXT: vpsrld $1, %xmm0, %xmm0
; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: PR41316:
; AVX2: # %bb.0:
; AVX2-NEXT: vpsllw $2, %xmm0, %xmm0
; AVX2-NEXT: vpsllw $2, %xmm1, %xmm1
; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
; AVX2-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0
; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpsrld $1, %ymm0, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
; AVX512-LABEL: PR41316:
; AVX512: # %bb.0:
; AVX512-NEXT: vpsllw $2, %xmm0, %xmm0
; AVX512-NEXT: vpsllw $2, %xmm1, %xmm1
; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
; AVX512-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0
; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; AVX512-NEXT: vpaddd %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpsrld $1, %ymm0, %ymm0
; AVX512-NEXT: vpmovdw %zmm0, %ymm0
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
; AVX-LABEL: PR41316:
; AVX: # %bb.0:
; AVX-NEXT: vpsllw $2, %xmm0, %xmm0
; AVX-NEXT: vpsllw $2, %xmm1, %xmm1
; AVX-NEXT: vpavgw %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
%3 = bitcast <2 x i64> %0 to <8 x i16>
%4 = shl <8 x i16> %3, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
%5 = bitcast <2 x i64> %1 to <8 x i16>