[x86] eliminate unnecessary broadcast of horizontal op

This is another pattern that comes up if we more aggressively
scalarize FP ops.

llvm-svn: 357703
This commit is contained in:
Sanjay Patel 2019-04-04 14:46:13 +00:00
parent b920a7f65b
commit 17648b848e
2 changed files with 14 additions and 5 deletions

View File

@ -32790,10 +32790,19 @@ static SDValue combineShuffleOfConcatUndef(SDNode *N, SelectionDAG &DAG,
/// Eliminate a redundant shuffle of a horizontal math op.
static SDValue foldShuffleOfHorizOp(SDNode *N) {
unsigned Opcode = N->getOpcode();
if (Opcode != X86ISD::MOVDDUP)
if (Opcode != X86ISD::MOVDDUP && Opcode != X86ISD::VBROADCAST)
if (Opcode != ISD::VECTOR_SHUFFLE || !N->getOperand(1).isUndef())
return SDValue();
// For a broadcast, peek through an extract element of index 0 to find the
// horizontal op: broadcast (ext_vec_elt HOp, 0)
if (Opcode == X86ISD::VBROADCAST) {
SDValue SrcOp = N->getOperand(0);
if (SrcOp.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
SrcOp.getValueType() == MVT::f64 && isNullConstant(SrcOp.getOperand(1)))
N = SrcOp.getNode();
}
SDValue HOp = N->getOperand(0);
if (HOp.getOpcode() != X86ISD::HADD && HOp.getOpcode() != X86ISD::FHADD &&
HOp.getOpcode() != X86ISD::HSUB && HOp.getOpcode() != X86ISD::FHSUB)
@ -32808,10 +32817,11 @@ static SDValue foldShuffleOfHorizOp(SDNode *N) {
return SDValue();
// When the operands of a horizontal math op are identical, the low half of
// the result is the same as the high half. If the shuffle is also replicating
// low and high halves, we don't need the shuffle.
if (Opcode == X86ISD::MOVDDUP) {
// the result is the same as the high half. If a target shuffle is also
// replicating low and high halves, we don't need the shuffle.
if (Opcode == X86ISD::MOVDDUP || Opcode == X86ISD::VBROADCAST) {
// movddup (hadd X, X) --> hadd X, X
// broadcast (extract_vec_elt (hadd X, X), 0) --> hadd X, X
assert((HOp.getValueType() == MVT::v2f64 ||
HOp.getValueType() == MVT::v4f64) && "Unexpected type for h-op");
return HOp;

View File

@ -349,7 +349,6 @@ define <2 x double> @hadd_v2f64_scalar_splat(<2 x double> %a) {
; AVX2_FAST-LABEL: hadd_v2f64_scalar_splat:
; AVX2_FAST: # %bb.0:
; AVX2_FAST-NEXT: vhaddpd %xmm0, %xmm0, %xmm0
; AVX2_FAST-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
; AVX2_FAST-NEXT: retq
%a0 = extractelement <2 x double> %a, i32 0
%a1 = extractelement <2 x double> %a, i32 1