Revert "[AArch64] Coalesce Copy Zero during instruction selection"

This reverts commit d8f57105010cc7e78026e511d5def873fc91e0e7.

Original Commit:

Author: Haicheng Wu <haicheng@codeaurora.org>
Date:   Sun Feb 18 13:51:33 2018 +0000

    [AArch64] Coalesce Copy Zero during instruction selection

    Add special case for copy of zero to avoid a double copy.

    Differential Revision: https://reviews.llvm.org/D36104

Author's intention is to remove a BB that has one mov instruction. In
order to do that, d8f571050 pessmizes MachineSinking by introducing a
copy, such that mov instruction is NOT moved to the BB. Optimization
downstream gets rid of the BB with only mov instruction. This works well
if we have only one fall through branch as there is only one "extra"
mov instruction.

If we have multiple fall throughs, we will have a lot of redundant movs.
In such a case, it's better to have this BB which has one mov instruction.

This is causing degradation in jpeg, fft and other codebases. I believe
if we want to remove a BB with only one branch instruction, we should not
pessimize Machine Sinking at all, and find some other solution.

llvm-svn: 335251
This commit is contained in:
Sirish Pande 2018-06-21 16:05:24 +00:00
parent 22ee191c3e
commit b60acb9e48
5 changed files with 3 additions and 79 deletions

View File

@ -2892,35 +2892,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
} }
break; break;
} }
case ISD::CopyToReg: {
// Special case for copy of zero to avoid a double copy.
SDNode *CopyVal = Node->getOperand(2).getNode();
ConstantSDNode *CopyValConst = dyn_cast<ConstantSDNode>(CopyVal);
if (!CopyValConst || !CopyValConst->isNullValue())
break;
const SDValue &Dest = Node->getOperand(1);
if (!TargetRegisterInfo::isVirtualRegister(
cast<RegisterSDNode>(Dest)->getReg()))
break;
unsigned ZeroReg;
EVT ZeroVT = CopyValConst->getValueType(0);
if (ZeroVT == MVT::i32)
ZeroReg = AArch64::WZR;
else if (ZeroVT == MVT::i64)
ZeroReg = AArch64::XZR;
else
break;
unsigned NumOperands = Node->getNumOperands();
SDValue ZeroRegVal = CurDAG->getRegister(ZeroReg, ZeroVT);
// Replace the source operand (#0) with ZeroRegVal.
SDValue Ops[] = {Node->getOperand(0), Node->getOperand(1), ZeroRegVal,
(NumOperands == 4) ? Node->getOperand(3) : SDValue()};
SDValue New =
CurDAG->getNode(ISD::CopyToReg, SDLoc(Node), Node->getVTList(),
makeArrayRef(Ops, NumOperands));
ReplaceNode(Node, New.getNode());
return;
}
case ISD::FrameIndex: { case ISD::FrameIndex: {
// Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm. // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm.
int FI = cast<FrameIndexSDNode>(Node)->getIndex(); int FI = cast<FrameIndexSDNode>(Node)->getIndex();

View File

@ -28,7 +28,6 @@ define zeroext i8 @fullGtU(i32 %i1, i32 %i2) {
; Next BB ; Next BB
; CHECK: ldrb [[LOADEDVAL3:w[0-9]+]], {{\[}}[[BLOCKBASE1]], #2] ; CHECK: ldrb [[LOADEDVAL3:w[0-9]+]], {{\[}}[[BLOCKBASE1]], #2]
; CHECK-NEXT: ldrb [[LOADEDVAL4:w[0-9]+]], {{\[}}[[BLOCKBASE2]], #2] ; CHECK-NEXT: ldrb [[LOADEDVAL4:w[0-9]+]], {{\[}}[[BLOCKBASE2]], #2]
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: cmp [[LOADEDVAL3]], [[LOADEDVAL4]] ; CHECK-NEXT: cmp [[LOADEDVAL3]], [[LOADEDVAL4]]
entry: entry:
%idxprom = sext i32 %i1 to i64 %idxprom = sext i32 %i1 to i64

View File

@ -10,7 +10,7 @@ entry:
; CHECK: subs ; CHECK: subs
; CHECK-NOT: cmp ; CHECK-NOT: cmp
; CHECK-NOT: sub ; CHECK-NOT: sub
; CHECK: b.lt ; CHECK: b.ge
; CHECK: sub ; CHECK: sub
; CHECK: sub ; CHECK: sub
; CHECK-NOT: sub ; CHECK-NOT: sub

View File

@ -1,47 +0,0 @@
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu | FileCheck %s
; Verify there is no tiny block having only one mov wzr instruction between for.body.lr.ph and sw.epilog.loopexit
define void @unroll_by_2(i32 %trip_count, i32* %p) {
; CHECK-LABEL: unroll_by_2
; CHECK: // %for.body.lr.ph
; CHECK: mov w{{[0-9]+}}, wzr
; CHECK: b.eq
; CHECK-NOT: mov w{{[0-9]+}}, wzr
; CHECK: // %for.body.lr.ph.new
; CHECK: // %for.body
; CHECK: // %sw.epilog.loopexit
; CHECK: // %for.body.epil
; CHECK: // %exit
; CHECK-NEXT: ret
for.body.lr.ph:
%xtraiter = and i32 %trip_count, 1
%cmp = icmp eq i32 %trip_count, 1
br i1 %cmp, label %sw.epilog.loopexit, label %for.body.lr.ph.new
for.body.lr.ph.new:
%unroll_iter = sub nsw i32 %trip_count, %xtraiter
br label %for.body
for.body:
%indvars = phi i32 [ 0, %for.body.lr.ph.new ], [ %indvars.next, %for.body ]
%niter = phi i32 [ %unroll_iter, %for.body.lr.ph.new ], [ %niter.nsub, %for.body ]
%array = getelementptr inbounds i32, i32 * %p, i32 %indvars
store i32 %niter, i32* %array
%indvars.next = add i32 %indvars, 2
%niter.nsub = add i32 %niter, -2
%niter.ncmp = icmp eq i32 %niter.nsub, 0
br i1 %niter.ncmp, label %sw.epilog.loopexit, label %for.body
sw.epilog.loopexit:
%indvars.unr = phi i32 [ 0, %for.body.lr.ph ], [ %indvars.next, %for.body ]
%lcmp.mod = icmp eq i32 %xtraiter, 0
br i1 %lcmp.mod, label %exit, label %for.body.epil
for.body.epil:
%array.epil = getelementptr inbounds i32, i32* %p, i32 %indvars.unr
store i32 %indvars.unr, i32* %array.epil
br label %exit
exit:
ret void
}

View File

@ -10,7 +10,7 @@ define void @test1() {
; registers that make up the i128 pair ; registers that make up the i128 pair
; CHECK: mov x0, xzr ; CHECK: mov x0, xzr
; CHECK: mov x1, xzr ; CHECK: mov x1, x0
; CHECK: bl _test2 ; CHECK: bl _test2
} }