Revert "[AArch64] Coalesce Copy Zero during instruction selection"

This reverts commit d8f57105010cc7e78026e511d5def873fc91e0e7. Original Commit: Author: Haicheng Wu <haicheng@codeaurora.org> Date: Sun Feb 18 13:51:33 2018 +0000 [AArch64] Coalesce Copy Zero during instruction selection Add special case for copy of zero to avoid a double copy. Differential Revision: https://reviews.llvm.org/D36104 Author's intention is to remove a BB that has one mov instruction. In order to do that, d8f571050 pessmizes MachineSinking by introducing a copy, such that mov instruction is NOT moved to the BB. Optimization downstream gets rid of the BB with only mov instruction. This works well if we have only one fall through branch as there is only one "extra" mov instruction. If we have multiple fall throughs, we will have a lot of redundant movs. In such a case, it's better to have this BB which has one mov instruction. This is causing degradation in jpeg, fft and other codebases. I believe if we want to remove a BB with only one branch instruction, we should not pessimize Machine Sinking at all, and find some other solution. llvm-svn: 335251
2018-06-21 16:05:24 +00:00 · 2018-06-21 16:05:24 +00:00 · b60acb9e48
parent 22ee191c3e
commit b60acb9e48
5 changed files with 3 additions and 79 deletions
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@ -2892,35 +2892,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
    }
    break;
  }
-  case ISD::CopyToReg: {
+
    // Special case for copy of zero to avoid a double copy.
    SDNode *CopyVal = Node->getOperand(2).getNode();
    ConstantSDNode *CopyValConst = dyn_cast<ConstantSDNode>(CopyVal);
    if (!CopyValConst || !CopyValConst->isNullValue())
      break;
    const SDValue &Dest = Node->getOperand(1);
    if (!TargetRegisterInfo::isVirtualRegister(
            cast<RegisterSDNode>(Dest)->getReg()))
      break;
    unsigned ZeroReg;
    EVT ZeroVT = CopyValConst->getValueType(0);
    if (ZeroVT == MVT::i32)
      ZeroReg = AArch64::WZR;
    else if (ZeroVT == MVT::i64)
      ZeroReg = AArch64::XZR;
    else
      break;
    unsigned NumOperands = Node->getNumOperands();
    SDValue ZeroRegVal = CurDAG->getRegister(ZeroReg, ZeroVT);
    // Replace the source operand (#0) with ZeroRegVal.
    SDValue Ops[] = {Node->getOperand(0), Node->getOperand(1), ZeroRegVal,
                     (NumOperands == 4) ? Node->getOperand(3) : SDValue()};
    SDValue New =
        CurDAG->getNode(ISD::CopyToReg, SDLoc(Node), Node->getVTList(),
                        makeArrayRef(Ops, NumOperands));
    ReplaceNode(Node, New.getNode());
    return;
  }
  case ISD::FrameIndex: {
    // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm.
    int FI = cast<FrameIndexSDNode>(Node)->getIndex();
--- a/llvm/test/CodeGen/AArch64/arm64-addr-type-promotion.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-addr-type-promotion.ll
@ -28,7 +28,6 @@ define zeroext i8 @fullGtU(i32 %i1, i32 %i2) {
 ; Next BB
 ; CHECK: ldrb [[LOADEDVAL3:w[0-9]+]], {{\[}}[[BLOCKBASE1]], #2]
 ; CHECK-NEXT: ldrb [[LOADEDVAL4:w[0-9]+]], {{\[}}[[BLOCKBASE2]], #2]
 ; CHECK-NEXT: mov w0, wzr
 ; CHECK-NEXT: cmp [[LOADEDVAL3]], [[LOADEDVAL4]]
 entry:
  %idxprom = sext i32 %i1 to i64
--- a/llvm/test/CodeGen/AArch64/arm64-cse.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-cse.ll
@ -10,7 +10,7 @@ entry:
 ; CHECK: subs
 ; CHECK-NOT: cmp
 ; CHECK-NOT: sub
-; CHECK: b.lt
+; CHECK: b.ge
 ; CHECK: sub
 ; CHECK: sub
 ; CHECK-NOT: sub
--- a/llvm/test/CodeGen/AArch64/copy-zero-reg.ll
+++ b/llvm/test/CodeGen/AArch64/copy-zero-reg.ll
@ -1,47 +0,0 @@
 ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu | FileCheck %s
 ; Verify there is no tiny block having only one mov wzr instruction between for.body.lr.ph and sw.epilog.loopexit
 define void @unroll_by_2(i32 %trip_count, i32* %p) {
 ; CHECK-LABEL: unroll_by_2
 ; CHECK: // %for.body.lr.ph
 ; CHECK:     mov   w{{[0-9]+}}, wzr
 ; CHECK:     b.eq
 ; CHECK-NOT: mov   w{{[0-9]+}}, wzr
 ; CHECK: // %for.body.lr.ph.new
 ; CHECK: // %for.body
 ; CHECK: // %sw.epilog.loopexit
 ; CHECK: // %for.body.epil
 ; CHECK: // %exit
 ; CHECK-NEXT:   ret
 for.body.lr.ph:
  %xtraiter = and i32 %trip_count, 1
  %cmp = icmp eq i32 %trip_count, 1
  br i1 %cmp, label %sw.epilog.loopexit, label %for.body.lr.ph.new
 for.body.lr.ph.new:
  %unroll_iter = sub nsw i32 %trip_count, %xtraiter
  br label %for.body
 for.body:
  %indvars = phi i32 [ 0, %for.body.lr.ph.new ], [ %indvars.next, %for.body ]
  %niter = phi i32 [ %unroll_iter, %for.body.lr.ph.new ], [ %niter.nsub, %for.body ]
  %array = getelementptr inbounds i32, i32 * %p, i32 %indvars
  store  i32 %niter, i32* %array
  %indvars.next = add i32 %indvars, 2
  %niter.nsub = add i32 %niter, -2
  %niter.ncmp = icmp eq i32 %niter.nsub, 0
  br i1 %niter.ncmp, label %sw.epilog.loopexit, label %for.body
 sw.epilog.loopexit:
  %indvars.unr = phi i32 [ 0, %for.body.lr.ph ], [ %indvars.next, %for.body ]
  %lcmp.mod = icmp eq i32 %xtraiter, 0
  br i1 %lcmp.mod, label %exit, label %for.body.epil
 for.body.epil:
  %array.epil = getelementptr inbounds i32, i32* %p, i32 %indvars.unr
  store  i32 %indvars.unr, i32* %array.epil
  br label %exit
 exit:
  ret void
 }
--- a/llvm/test/CodeGen/AArch64/i128-fast-isel-fallback.ll
+++ b/llvm/test/CodeGen/AArch64/i128-fast-isel-fallback.ll
@ -10,7 +10,7 @@ define void @test1() {
 ; registers that make up the i128 pair
 ; CHECK:  mov  x0, xzr
-; CHECK:  mov  x1, xzr 
+; CHECK:  mov  x1, x0
 ; CHECK:  bl  _test2
 }