[PowerPC] optimize conditional branch on CRSET/CRUNSET
This patch adds a check to optimize conditional branch (BC and BCn) based on a constant set by CRSET or CRUNSET. Other optimizers, such as block placement, may generate such code and hence I do this at the very end of the optimization in pre-emit peephole pass. A conditional branch based on a constant is eliminated or converted into unconditional branch. Also CRSET/CRUNSET is eliminated if the condition code register is not used by instruction other than the branch to be optimized. Differential Revision: https://reviews.llvm.org/D52345 llvm-svn: 343100
This commit is contained in:
parent
20b5abe23b
commit
20982f0995
|
@ -18,6 +18,7 @@
|
|||
#include "llvm/ADT/DenseMap.h"
|
||||
#include "llvm/ADT/Statistic.h"
|
||||
#include "llvm/CodeGen/LivePhysRegs.h"
|
||||
#include "llvm/CodeGen/MachineBasicBlock.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
|
@ -60,6 +61,7 @@ namespace {
|
|||
return false;
|
||||
bool Changed = false;
|
||||
const PPCInstrInfo *TII = MF.getSubtarget<PPCSubtarget>().getInstrInfo();
|
||||
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
|
||||
SmallVector<MachineInstr *, 4> InstrsToErase;
|
||||
for (MachineBasicBlock &MBB : MF) {
|
||||
for (MachineInstr &MI : MBB) {
|
||||
|
@ -74,6 +76,75 @@ namespace {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Eliminate conditional branch based on a constant CR bit by
|
||||
// CRSET or CRUNSET. We eliminate the conditional branch or
|
||||
// convert it into an unconditional branch. Also, if the CR bit
|
||||
// is not used by other instructions, we eliminate CRSET as well.
|
||||
auto I = MBB.getFirstInstrTerminator();
|
||||
if (I == MBB.instr_end())
|
||||
continue;
|
||||
MachineInstr *Br = &*I;
|
||||
if (Br->getOpcode() != PPC::BC && Br->getOpcode() != PPC::BCn)
|
||||
continue;
|
||||
MachineInstr *CRSetMI = nullptr;
|
||||
unsigned CRBit = Br->getOperand(0).getReg();
|
||||
unsigned CRReg = getCRFromCRBit(CRBit);
|
||||
bool SeenUse = false;
|
||||
MachineBasicBlock::reverse_iterator It = Br, Er = MBB.rend();
|
||||
for (It++; It != Er; It++) {
|
||||
if (It->modifiesRegister(CRBit, TRI)) {
|
||||
if ((It->getOpcode() == PPC::CRUNSET ||
|
||||
It->getOpcode() == PPC::CRSET) &&
|
||||
It->getOperand(0).getReg() == CRBit)
|
||||
CRSetMI = &*It;
|
||||
break;
|
||||
}
|
||||
if (It->readsRegister(CRBit, TRI))
|
||||
SeenUse = true;
|
||||
}
|
||||
if (!CRSetMI) continue;
|
||||
|
||||
unsigned CRSetOp = CRSetMI->getOpcode();
|
||||
if ((Br->getOpcode() == PPC::BCn && CRSetOp == PPC::CRSET) ||
|
||||
(Br->getOpcode() == PPC::BC && CRSetOp == PPC::CRUNSET)) {
|
||||
// Remove this branch since it cannot be taken.
|
||||
InstrsToErase.push_back(Br);
|
||||
MBB.removeSuccessor(Br->getOperand(1).getMBB());
|
||||
}
|
||||
else {
|
||||
// This conditional branch is always taken. So, remove all branches
|
||||
// and insert an unconditional branch to the destination of this.
|
||||
MachineBasicBlock::iterator It = Br, Er = MBB.end();
|
||||
for (; It != Er && !SeenUse; It++) {
|
||||
if (It->isDebugInstr()) continue;
|
||||
assert(It->isTerminator() && "Non-terminator after a terminator");
|
||||
InstrsToErase.push_back(&*It);
|
||||
}
|
||||
if (!MBB.isLayoutSuccessor(Br->getOperand(1).getMBB())) {
|
||||
ArrayRef<MachineOperand> NoCond;
|
||||
TII->insertBranch(MBB, Br->getOperand(1).getMBB(), nullptr,
|
||||
NoCond, Br->getDebugLoc());
|
||||
}
|
||||
for (auto &Succ : MBB.successors())
|
||||
if (Succ != Br->getOperand(1).getMBB()) {
|
||||
MBB.removeSuccessor(Succ);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// If the CRBit is not used by another instruction, we can eliminate
|
||||
// CRSET/CRUNSET instruction.
|
||||
if (!SeenUse) {
|
||||
// We need to check use of the CRBit in successors.
|
||||
for (auto &SuccMBB : MBB.successors())
|
||||
if (SuccMBB->isLiveIn(CRBit) || SuccMBB->isLiveIn(CRReg)) {
|
||||
SeenUse = true;
|
||||
break;
|
||||
}
|
||||
if (!SeenUse)
|
||||
InstrsToErase.push_back(CRSetMI);
|
||||
}
|
||||
}
|
||||
for (MachineInstr *MI : InstrsToErase) {
|
||||
LLVM_DEBUG(dbgs() << "PPC pre-emit peephole: erasing instruction: ");
|
||||
|
|
|
@ -0,0 +1,132 @@
|
|||
# RUN: llc -verify-machineinstrs -start-before=ppc-pre-emit-peephole %s -o - | FileCheck %s
|
||||
--- |
|
||||
target datalayout = "e-m:e-i64:64-n32:64"
|
||||
target triple = "powerpc64le-unknown-linux-gnu"
|
||||
|
||||
declare signext i32 @callee(i32 signext) local_unnamed_addr #1
|
||||
|
||||
define signext i32 @func(i32 signext %v) local_unnamed_addr #0 {
|
||||
entry:
|
||||
%call.i = tail call signext i32 @callee(i32 signext %v)
|
||||
%tobool.i = icmp eq i32 %call.i, 0
|
||||
br i1 %tobool.i, label %if.else.i, label %if.then.i
|
||||
|
||||
if.then.i: ; preds = %entry
|
||||
%call2.i = tail call signext i32 @callee(i32 signext %call.i)
|
||||
br label %_Z6calleei.exit
|
||||
|
||||
if.else.i: ; preds = %entry
|
||||
%phitmp = icmp sgt i32 %v, -1
|
||||
br label %_Z6calleei.exit
|
||||
|
||||
_Z6calleei.exit: ; preds = %if.else.i, %if.then.i
|
||||
%call2.i.sink = phi i32 [ %call2.i, %if.then.i ], [ %v, %if.else.i ]
|
||||
%.sink = phi i1 [ false, %if.then.i ], [ %phitmp, %if.else.i ]
|
||||
br i1 %.sink, label %if.end, label %if.then
|
||||
|
||||
if.then: ; preds = %_Z6calleei.exit
|
||||
%call1 = tail call signext i32 @callee(i32 signext 0)
|
||||
br label %if.end
|
||||
|
||||
if.end: ; preds = %if.then, %_Z6calleei.exit
|
||||
ret i32 %call2.i.sink
|
||||
}
|
||||
|
||||
attributes #0 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
|
||||
...
|
||||
---
|
||||
name: func
|
||||
alignment: 4
|
||||
exposesReturnsTwice: false
|
||||
legalized: false
|
||||
regBankSelected: false
|
||||
selected: false
|
||||
failedISel: false
|
||||
tracksRegLiveness: true
|
||||
registers:
|
||||
liveins:
|
||||
- { reg: '$x3', virtual-reg: '' }
|
||||
frameInfo:
|
||||
isFrameAddressTaken: false
|
||||
isReturnAddressTaken: false
|
||||
hasStackMap: false
|
||||
hasPatchPoint: false
|
||||
stackSize: 48
|
||||
offsetAdjustment: 0
|
||||
maxAlignment: 0
|
||||
adjustsStack: true
|
||||
hasCalls: true
|
||||
stackProtector: ''
|
||||
maxCallFrameSize: 32
|
||||
hasOpaqueSPAdjustment: false
|
||||
hasVAStart: false
|
||||
hasMustTailInVarArgFunc: false
|
||||
localFrameSize: 0
|
||||
savePoint: ''
|
||||
restorePoint: ''
|
||||
fixedStack:
|
||||
- { id: 0, type: spill-slot, offset: -16, size: 8, alignment: 16, stack-id: 0,
|
||||
callee-saved-register: '$x30', callee-saved-restored: true, debug-info-variable: '',
|
||||
debug-info-expression: '', debug-info-location: '' }
|
||||
stack:
|
||||
constants:
|
||||
|
||||
body: |
|
||||
bb.0.entry:
|
||||
successors: %bb.2(0x30000000), %bb.1(0x50000000)
|
||||
liveins: $x3, $x30
|
||||
|
||||
; bc and crxor (CRUNSET) should be removed.
|
||||
; CHECK-LABEL: func
|
||||
; CHECK: # %bb.1
|
||||
; CHECK-NOT: crxor
|
||||
; CHECK-NOT: bc
|
||||
; CHECK: .LBB0_2
|
||||
|
||||
$x0 = MFLR8 implicit $lr8
|
||||
STD killed $x0, 16, $x1
|
||||
$x1 = STDU $x1, -48, $x1
|
||||
STD killed $x30, 32, $x1 :: (store 8 into %fixed-stack.0, align 16)
|
||||
$x30 = OR8 $x3, $x3
|
||||
BL8_NOP @callee, csr_svr464_altivec, implicit-def dead $lr8, implicit $rm, implicit killed $x3, implicit $x2, implicit-def $r1, implicit-def $x3
|
||||
renamable $cr0 = CMPLWI renamable $r3, 0
|
||||
BCC 76, killed renamable $cr0, %bb.2
|
||||
|
||||
bb.1.if.then.i:
|
||||
successors: %bb.5(0x40000000), %bb.4(0x40000000)
|
||||
liveins: $x3
|
||||
|
||||
renamable $x3 = EXTSW_32_64 killed renamable $r3, implicit $x3
|
||||
BL8_NOP @callee, csr_svr464_altivec, implicit-def dead $lr8, implicit $rm, implicit killed $x3, implicit $x2, implicit-def $r1, implicit-def $x3
|
||||
renamable $cr0gt = CRUNSET implicit-def $cr0
|
||||
$x30 = OR8 killed $x3, $x3
|
||||
BC killed renamable $cr0gt, %bb.5
|
||||
|
||||
bb.4.if.then:
|
||||
successors: %bb.5(0x80000000)
|
||||
liveins: $x30
|
||||
|
||||
$x3 = LI8 0
|
||||
BL8_NOP @callee, csr_svr464_altivec, implicit-def dead $lr8, implicit $rm, implicit killed $x3, implicit $x2, implicit-def $r1, implicit-def dead $x3
|
||||
|
||||
bb.5.if.end:
|
||||
liveins: $x30
|
||||
|
||||
renamable $x3 = EXTSW_32_64 killed renamable $r30, implicit $x30
|
||||
$x30 = LD 32, $x1 :: (load 8 from %fixed-stack.0, align 16)
|
||||
$x1 = ADDI8 $x1, 48
|
||||
$x0 = LD 16, $x1
|
||||
MTLR8 killed $x0, implicit-def $lr8
|
||||
BLR8 implicit $lr8, implicit $rm, implicit killed $x3
|
||||
|
||||
bb.2.if.else.i:
|
||||
successors: %bb.5(0x40000000), %bb.4(0x40000000)
|
||||
liveins: $x30
|
||||
|
||||
renamable $cr0 = CMPWI renamable $r30, -1
|
||||
BCn killed renamable $cr0gt, %bb.4
|
||||
B %bb.5
|
||||
|
||||
...
|
|
@ -0,0 +1,132 @@
|
|||
# RUN: llc -verify-machineinstrs -start-before=ppc-pre-emit-peephole %s -o - | FileCheck %s
|
||||
--- |
|
||||
target datalayout = "e-m:e-i64:64-n32:64"
|
||||
target triple = "powerpc64le-unknown-linux-gnu"
|
||||
|
||||
declare signext i32 @callee(i32 signext) local_unnamed_addr #1
|
||||
|
||||
define signext i32 @func(i32 signext %v) local_unnamed_addr #0 {
|
||||
entry:
|
||||
%call.i = tail call signext i32 @callee(i32 signext %v)
|
||||
%tobool.i = icmp eq i32 %call.i, 0
|
||||
br i1 %tobool.i, label %if.else.i, label %if.then.i
|
||||
|
||||
if.then.i: ; preds = %entry
|
||||
%call2.i = tail call signext i32 @callee(i32 signext %call.i)
|
||||
br label %_Z6calleei.exit
|
||||
|
||||
if.else.i: ; preds = %entry
|
||||
%phitmp = icmp sgt i32 %v, -1
|
||||
br label %_Z6calleei.exit
|
||||
|
||||
_Z6calleei.exit: ; preds = %if.else.i, %if.then.i
|
||||
%call2.i.sink = phi i32 [ %call2.i, %if.then.i ], [ %v, %if.else.i ]
|
||||
%.sink = phi i1 [ false, %if.then.i ], [ %phitmp, %if.else.i ]
|
||||
br i1 %.sink, label %if.end, label %if.then
|
||||
|
||||
if.then: ; preds = %_Z6calleei.exit
|
||||
%call1 = tail call signext i32 @callee(i32 signext 0)
|
||||
br label %if.end
|
||||
|
||||
if.end: ; preds = %if.then, %_Z6calleei.exit
|
||||
ret i32 %call2.i.sink
|
||||
}
|
||||
|
||||
attributes #0 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
|
||||
...
|
||||
---
|
||||
name: func
|
||||
alignment: 4
|
||||
exposesReturnsTwice: false
|
||||
legalized: false
|
||||
regBankSelected: false
|
||||
selected: false
|
||||
failedISel: false
|
||||
tracksRegLiveness: true
|
||||
registers:
|
||||
liveins:
|
||||
- { reg: '$x3', virtual-reg: '' }
|
||||
frameInfo:
|
||||
isFrameAddressTaken: false
|
||||
isReturnAddressTaken: false
|
||||
hasStackMap: false
|
||||
hasPatchPoint: false
|
||||
stackSize: 48
|
||||
offsetAdjustment: 0
|
||||
maxAlignment: 0
|
||||
adjustsStack: true
|
||||
hasCalls: true
|
||||
stackProtector: ''
|
||||
maxCallFrameSize: 32
|
||||
hasOpaqueSPAdjustment: false
|
||||
hasVAStart: false
|
||||
hasMustTailInVarArgFunc: false
|
||||
localFrameSize: 0
|
||||
savePoint: ''
|
||||
restorePoint: ''
|
||||
fixedStack:
|
||||
- { id: 0, type: spill-slot, offset: -16, size: 8, alignment: 16, stack-id: 0,
|
||||
callee-saved-register: '$x30', callee-saved-restored: true, debug-info-variable: '',
|
||||
debug-info-expression: '', debug-info-location: '' }
|
||||
stack:
|
||||
constants:
|
||||
|
||||
body: |
|
||||
bb.0.entry:
|
||||
successors: %bb.2(0x30000000), %bb.1(0x50000000)
|
||||
liveins: $x3, $x30
|
||||
|
||||
; bc should be converted into b, but creqv (CRSET) should not be removed since it is used in a predecessor.
|
||||
; CHECK-LABEL: func
|
||||
; CHECK: # %bb.1
|
||||
; CHECK: creqv
|
||||
; CHECK-NOT: bc
|
||||
; CHECK: .LBB0_2
|
||||
|
||||
$x0 = MFLR8 implicit $lr8
|
||||
STD killed $x0, 16, $x1
|
||||
$x1 = STDU $x1, -48, $x1
|
||||
STD killed $x30, 32, $x1 :: (store 8 into %fixed-stack.0, align 16)
|
||||
$x30 = OR8 $x3, $x3
|
||||
BL8_NOP @callee, csr_svr464_altivec, implicit-def dead $lr8, implicit $rm, implicit killed $x3, implicit $x2, implicit-def $r1, implicit-def $x3
|
||||
renamable $cr0 = CMPLWI renamable $r3, 0
|
||||
BCC 76, killed renamable $cr0, %bb.2
|
||||
|
||||
bb.1.if.then.i:
|
||||
successors: %bb.5(0x40000000), %bb.4(0x40000000)
|
||||
liveins: $x3
|
||||
|
||||
renamable $x3 = EXTSW_32_64 killed renamable $r3, implicit $x3
|
||||
BL8_NOP @callee, csr_svr464_altivec, implicit-def dead $lr8, implicit $rm, implicit killed $x3, implicit $x2, implicit-def $r1, implicit-def $x3
|
||||
renamable $cr0gt = CRSET implicit-def $cr0
|
||||
$x30 = OR8 killed $x3, $x3
|
||||
BC killed renamable $cr0gt, %bb.5
|
||||
|
||||
bb.4.if.then:
|
||||
successors: %bb.5(0x80000000)
|
||||
liveins: $x30
|
||||
|
||||
$x3 = LI8 0
|
||||
BL8_NOP @callee, csr_svr464_altivec, implicit-def dead $lr8, implicit $rm, implicit killed $x3, implicit $x2, implicit-def $r1, implicit-def dead $x3
|
||||
|
||||
bb.5.if.end:
|
||||
liveins: $x30, $cr0gt
|
||||
|
||||
renamable $x3 = EXTSW_32_64 killed renamable $r30, implicit $x30
|
||||
$x30 = LD 32, $x1 :: (load 8 from %fixed-stack.0, align 16)
|
||||
$x1 = ADDI8 $x1, 48
|
||||
$x0 = LD 16, $x1
|
||||
MTLR8 killed $x0, implicit-def $lr8
|
||||
BLR8 implicit $lr8, implicit $rm, implicit killed $x3
|
||||
|
||||
bb.2.if.else.i:
|
||||
successors: %bb.5(0x40000000), %bb.4(0x40000000)
|
||||
liveins: $x30
|
||||
|
||||
renamable $cr0 = CMPWI renamable $r30, -1
|
||||
BCn killed renamable $cr0gt, %bb.4
|
||||
B %bb.5
|
||||
|
||||
...
|
Loading…
Reference in New Issue