diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 6c62e661b5a7..c085bc557eed 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -38203,10 +38203,19 @@ static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG, AVGBuilder); } - // Matches 'add like' patterns. - // TODO: Extend this to include or/zext cases. + // Matches 'add like' patterns: add(Op0,Op1) + zext(or(Op0,Op1)). + // Match the or case only if its 'add-like' - can be replaced by an add. auto FindAddLike = [&](SDValue V, SDValue &Op0, SDValue &Op1) { - if (ISD::ADD != V.getOpcode()) + if (ISD::ADD == V.getOpcode()) { + Op0 = V.getOperand(0); + Op1 = V.getOperand(1); + return true; + } + if (ISD::ZERO_EXTEND != V.getOpcode()) + return false; + V = V.getOperand(0); + if (V.getValueType() != VT || ISD::OR != V.getOpcode() || + !DAG.haveNoCommonBitsSet(V.getOperand(0), V.getOperand(1))) return false; Op0 = V.getOperand(0); Op1 = V.getOperand(1); @@ -38222,7 +38231,7 @@ static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG, Operands[1] = Op1; // Now we have three operands of two additions. Check that one of them is a - // constant vector with ones, and the other two are promoted from i8/i16. + // constant vector with ones, and the other two can be promoted from i8/i16. for (int i = 0; i < 3; ++i) { if (!IsConstVectorInRange(Operands[i], 1, 1)) continue; @@ -38230,14 +38239,16 @@ static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG, // Check if Operands[0] and Operands[1] are results of type promotion. for (int j = 0; j < 2; ++j) - if (Operands[j].getOpcode() != ISD::ZERO_EXTEND || - Operands[j].getOperand(0).getValueType() != VT) - return SDValue(); + if (Operands[j].getValueType() != VT) { + if (Operands[j].getOpcode() != ISD::ZERO_EXTEND || + Operands[j].getOperand(0).getValueType() != VT) + return SDValue(); + Operands[j] = Operands[j].getOperand(0); + } // The pattern is detected, emit X86ISD::AVG instruction(s). - return SplitOpsAndApply(DAG, Subtarget, DL, VT, - { Operands[0].getOperand(0), - Operands[1].getOperand(0) }, AVGBuilder); + return SplitOpsAndApply(DAG, Subtarget, DL, VT, {Operands[0], Operands[1]}, + AVGBuilder); } return SDValue(); diff --git a/llvm/test/CodeGen/X86/avg.ll b/llvm/test/CodeGen/X86/avg.ll index 15074221827e..0494b0aeda94 100644 --- a/llvm/test/CodeGen/X86/avg.ll +++ b/llvm/test/CodeGen/X86/avg.ll @@ -2479,67 +2479,15 @@ define <2 x i64> @PR41316(<2 x i64>, <2 x i64>) { ; SSE2: # %bb.0: ; SSE2-NEXT: psllw $2, %xmm0 ; SSE2-NEXT: psllw $2, %xmm1 -; SSE2-NEXT: pxor %xmm2, %xmm2 -; SSE2-NEXT: movdqa %xmm1, %xmm3 -; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] -; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa %xmm0, %xmm4 -; SSE2-NEXT: punpckhwd {{.*#+}} xmm4 = xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7] -; SSE2-NEXT: paddd %xmm3, %xmm4 -; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] -; SSE2-NEXT: paddd %xmm1, %xmm0 -; SSE2-NEXT: pslld $15, %xmm4 -; SSE2-NEXT: psrad $16, %xmm4 -; SSE2-NEXT: pslld $15, %xmm0 -; SSE2-NEXT: psrad $16, %xmm0 -; SSE2-NEXT: packssdw %xmm4, %xmm0 +; SSE2-NEXT: pavgw %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; AVX1-LABEL: PR41316: -; AVX1: # %bb.0: -; AVX1-NEXT: vpsllw $2, %xmm0, %xmm0 -; AVX1-NEXT: vpsllw $2, %xmm1, %xmm1 -; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm3 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] -; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero -; AVX1-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0 -; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] -; AVX1-NEXT: vpaddd %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero -; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpsrld $1, %xmm2, %xmm1 -; AVX1-NEXT: vpsrld $1, %xmm0, %xmm0 -; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: PR41316: -; AVX2: # %bb.0: -; AVX2-NEXT: vpsllw $2, %xmm0, %xmm0 -; AVX2-NEXT: vpsllw $2, %xmm1, %xmm1 -; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero -; AVX2-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0 -; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpsrld $1, %ymm0, %ymm0 -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vzeroupper -; AVX2-NEXT: retq -; -; AVX512-LABEL: PR41316: -; AVX512: # %bb.0: -; AVX512-NEXT: vpsllw $2, %xmm0, %xmm0 -; AVX512-NEXT: vpsllw $2, %xmm1, %xmm1 -; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero -; AVX512-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0 -; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; AVX512-NEXT: vpaddd %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: vpsrld $1, %ymm0, %ymm0 -; AVX512-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 -; AVX512-NEXT: vzeroupper -; AVX512-NEXT: retq +; AVX-LABEL: PR41316: +; AVX: # %bb.0: +; AVX-NEXT: vpsllw $2, %xmm0, %xmm0 +; AVX-NEXT: vpsllw $2, %xmm1, %xmm1 +; AVX-NEXT: vpavgw %xmm0, %xmm1, %xmm0 +; AVX-NEXT: retq %3 = bitcast <2 x i64> %0 to <8 x i16> %4 = shl <8 x i16> %3, %5 = bitcast <2 x i64> %1 to <8 x i16>