[PowerPC] Remove redundant TOC saves
This patch adds a peep hole optimization to remove any redundant toc save instructions added as part of the call sequence for indirect calls. It removes any toc saves within a function that are dominated by another toc save. Differential Revision: https://reviews.llvm.org/D39736 llvm-svn: 319087
This commit is contained in:
parent
eef5c23305
commit
f94d58d908
|
@ -2266,6 +2266,20 @@ static bool isZeroExtendingOp(const MachineInstr &MI) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// This function returns true if the input MachineInstr is a TOC save
|
||||||
|
// instruction.
|
||||||
|
bool PPCInstrInfo::isTOCSaveMI(const MachineInstr &MI) const {
|
||||||
|
if (!MI.getOperand(1).isImm() || !MI.getOperand(2).isReg())
|
||||||
|
return false;
|
||||||
|
unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
|
||||||
|
unsigned StackOffset = MI.getOperand(1).getImm();
|
||||||
|
unsigned StackReg = MI.getOperand(2).getReg();
|
||||||
|
if (StackReg == PPC::X1 && StackOffset == TOCSaveOffset)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
// We limit the max depth to track incoming values of PHIs or binary ops
|
// We limit the max depth to track incoming values of PHIs or binary ops
|
||||||
// (e.g. AND) to avoid exsessive cost.
|
// (e.g. AND) to avoid exsessive cost.
|
||||||
const unsigned MAX_DEPTH = 1;
|
const unsigned MAX_DEPTH = 1;
|
||||||
|
|
|
@ -297,6 +297,8 @@ public:
|
||||||
const TargetRegisterClass *updatedRC(const TargetRegisterClass *RC) const;
|
const TargetRegisterClass *updatedRC(const TargetRegisterClass *RC) const;
|
||||||
static int getRecordFormOpcode(unsigned Opcode);
|
static int getRecordFormOpcode(unsigned Opcode);
|
||||||
|
|
||||||
|
bool isTOCSaveMI(const MachineInstr &MI) const;
|
||||||
|
|
||||||
bool isSignOrZeroExtended(const MachineInstr &MI, bool SignExt,
|
bool isSignOrZeroExtended(const MachineInstr &MI, bool SignExt,
|
||||||
const unsigned PhiDepth) const;
|
const unsigned PhiDepth) const;
|
||||||
|
|
||||||
|
|
|
@ -29,13 +29,15 @@
|
||||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||||
#include "llvm/Support/Debug.h"
|
#include "llvm/Support/Debug.h"
|
||||||
#include "llvm/ADT/Statistic.h"
|
|
||||||
#include "MCTargetDesc/PPCPredicates.h"
|
#include "MCTargetDesc/PPCPredicates.h"
|
||||||
|
|
||||||
using namespace llvm;
|
using namespace llvm;
|
||||||
|
|
||||||
#define DEBUG_TYPE "ppc-mi-peepholes"
|
#define DEBUG_TYPE "ppc-mi-peepholes"
|
||||||
|
|
||||||
|
STATISTIC(RemoveTOCSave, "Number of TOC saves removed");
|
||||||
|
STATISTIC(MultiTOCSaves,
|
||||||
|
"Number of functions with multiple TOC saves that must be kept");
|
||||||
STATISTIC(NumEliminatedSExt, "Number of eliminated sign-extensions");
|
STATISTIC(NumEliminatedSExt, "Number of eliminated sign-extensions");
|
||||||
STATISTIC(NumEliminatedZExt, "Number of eliminated zero-extensions");
|
STATISTIC(NumEliminatedZExt, "Number of eliminated zero-extensions");
|
||||||
STATISTIC(NumOptADDLIs, "Number of optimized ADD instruction fed by LI");
|
STATISTIC(NumOptADDLIs, "Number of optimized ADD instruction fed by LI");
|
||||||
|
@ -78,7 +80,9 @@ private:
|
||||||
|
|
||||||
// Perform peepholes.
|
// Perform peepholes.
|
||||||
bool eliminateRedundantCompare(void);
|
bool eliminateRedundantCompare(void);
|
||||||
|
bool eliminateRedundantTOCSaves(std::map<MachineInstr *, bool> &TOCSaves);
|
||||||
|
void UpdateTOCSaves(std::map<MachineInstr *, bool> &TOCSaves,
|
||||||
|
MachineInstr *MI);
|
||||||
// Find the "true" register represented by SrcReg (following chains
|
// Find the "true" register represented by SrcReg (following chains
|
||||||
// of copies and subreg_to_reg operations).
|
// of copies and subreg_to_reg operations).
|
||||||
unsigned lookThruCopyLike(unsigned SrcReg);
|
unsigned lookThruCopyLike(unsigned SrcReg);
|
||||||
|
@ -176,10 +180,37 @@ getKnownLeadingZeroCount(MachineInstr *MI, const PPCInstrInfo *TII) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// This function maintains a map for the pairs <TOC Save Instr, Keep>
|
||||||
|
// Each time a new TOC save is encountered, it checks if any of the exisiting
|
||||||
|
// ones are dominated by the new one. If so, it marks the exisiting one as
|
||||||
|
// redundant by setting it's entry in the map as false. It then adds the new
|
||||||
|
// instruction to the map with either true or false depending on if any
|
||||||
|
// exisiting instructions dominated the new one.
|
||||||
|
void PPCMIPeephole::UpdateTOCSaves(
|
||||||
|
std::map<MachineInstr *, bool> &TOCSaves, MachineInstr *MI) {
|
||||||
|
assert(TII->isTOCSaveMI(*MI) && "Expecting a TOC save instruction here");
|
||||||
|
bool Keep = true;
|
||||||
|
for (auto It = TOCSaves.begin(); It != TOCSaves.end(); It++ ) {
|
||||||
|
MachineInstr *CurrInst = It->first;
|
||||||
|
// If new instruction dominates an exisiting one, mark exisiting one as
|
||||||
|
// redundant.
|
||||||
|
if (It->second && MDT->dominates(MI, CurrInst))
|
||||||
|
It->second = false;
|
||||||
|
// Check if the new instruction is redundant.
|
||||||
|
if (MDT->dominates(CurrInst, MI)) {
|
||||||
|
Keep = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Add new instruction to map.
|
||||||
|
TOCSaves[MI] = Keep;
|
||||||
|
}
|
||||||
|
|
||||||
// Perform peephole optimizations.
|
// Perform peephole optimizations.
|
||||||
bool PPCMIPeephole::simplifyCode(void) {
|
bool PPCMIPeephole::simplifyCode(void) {
|
||||||
bool Simplified = false;
|
bool Simplified = false;
|
||||||
MachineInstr* ToErase = nullptr;
|
MachineInstr* ToErase = nullptr;
|
||||||
|
std::map<MachineInstr *, bool> TOCSaves;
|
||||||
|
|
||||||
for (MachineBasicBlock &MBB : *MF) {
|
for (MachineBasicBlock &MBB : *MF) {
|
||||||
for (MachineInstr &MI : MBB) {
|
for (MachineInstr &MI : MBB) {
|
||||||
|
@ -201,6 +232,18 @@ bool PPCMIPeephole::simplifyCode(void) {
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case PPC::STD: {
|
||||||
|
MachineFrameInfo &MFI = MF->getFrameInfo();
|
||||||
|
if (MFI.hasVarSizedObjects() ||
|
||||||
|
!MF->getSubtarget<PPCSubtarget>().isELFv2ABI())
|
||||||
|
break;
|
||||||
|
// When encountering a TOC save instruction, call UpdateTOCSaves
|
||||||
|
// to add it to the TOCSaves map and mark any exisiting TOC saves
|
||||||
|
// it dominates as redundant.
|
||||||
|
if (TII->isTOCSaveMI(MI))
|
||||||
|
UpdateTOCSaves(TOCSaves, &MI);
|
||||||
|
break;
|
||||||
|
}
|
||||||
case PPC::XXPERMDI: {
|
case PPC::XXPERMDI: {
|
||||||
// Perform simplifications of 2x64 vector swaps and splats.
|
// Perform simplifications of 2x64 vector swaps and splats.
|
||||||
// A swap is identified by an immediate value of 2, and a splat
|
// A swap is identified by an immediate value of 2, and a splat
|
||||||
|
@ -683,6 +726,8 @@ bool PPCMIPeephole::simplifyCode(void) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Eliminate all the TOC save instructions which are redundant.
|
||||||
|
Simplified |= eliminateRedundantTOCSaves(TOCSaves);
|
||||||
// We try to eliminate redundant compare instruction.
|
// We try to eliminate redundant compare instruction.
|
||||||
Simplified |= eliminateRedundantCompare();
|
Simplified |= eliminateRedundantCompare();
|
||||||
|
|
||||||
|
@ -888,6 +933,30 @@ static bool eligibleForCompareElimination(MachineBasicBlock &MBB,
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// This function will iterate over the input map containing a pair of TOC save
|
||||||
|
// instruction and a flag. The flag will be set to false if the TOC save is proven
|
||||||
|
// redundant. This function will erase from the basic block all the TOC saves
|
||||||
|
// marked as redundant.
|
||||||
|
bool PPCMIPeephole::eliminateRedundantTOCSaves(
|
||||||
|
std::map<MachineInstr *, bool> &TOCSaves) {
|
||||||
|
bool Simplified = false;
|
||||||
|
int NumKept = 0;
|
||||||
|
for (auto TOCSave : TOCSaves) {
|
||||||
|
if (!TOCSave.second) {
|
||||||
|
TOCSave.first->eraseFromParent();
|
||||||
|
RemoveTOCSave++;
|
||||||
|
Simplified = true;
|
||||||
|
} else {
|
||||||
|
NumKept++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (NumKept > 1)
|
||||||
|
MultiTOCSaves++;
|
||||||
|
|
||||||
|
return Simplified;
|
||||||
|
}
|
||||||
|
|
||||||
// If multiple conditional branches are executed based on the (essentially)
|
// If multiple conditional branches are executed based on the (essentially)
|
||||||
// same comparison, we merge compare instructions into one and make multiple
|
// same comparison, we merge compare instructions into one and make multiple
|
||||||
// conditional branches on this comparison.
|
// conditional branches on this comparison.
|
||||||
|
|
|
@ -0,0 +1,123 @@
|
||||||
|
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s
|
||||||
|
define signext i32 @test1(i32 signext %i, i32 (i32)* nocapture %Func, i32 (i32)* nocapture %Func2) {
|
||||||
|
entry:
|
||||||
|
; CHECK-LABEL: test1:
|
||||||
|
; CHECK: std 2, 24(1)
|
||||||
|
; CHECK-NOT: std 2, 24(1)
|
||||||
|
%call = tail call signext i32 %Func(i32 signext %i)
|
||||||
|
%call1 = tail call signext i32 %Func2(i32 signext %i)
|
||||||
|
%add2 = add nsw i32 %call1, %call
|
||||||
|
ret i32 %add2
|
||||||
|
}
|
||||||
|
|
||||||
|
define signext i32 @test2(i32 signext %i, i32 signext %j, i32 (i32)* nocapture %Func, i32 (i32)* nocapture %Func2) {
|
||||||
|
entry:
|
||||||
|
; CHECK-LABEL: test2:
|
||||||
|
; CHECK: std 2, 24(1)
|
||||||
|
; CHECK-NOT: std 2, 24(1)
|
||||||
|
%call = tail call signext i32 %Func(i32 signext %i)
|
||||||
|
%tobool = icmp eq i32 %j, 0
|
||||||
|
br i1 %tobool, label %if.end, label %if.then
|
||||||
|
|
||||||
|
if.then: ; preds = %entry
|
||||||
|
%call1 = tail call signext i32 %Func(i32 signext %i)
|
||||||
|
%add2 = add nsw i32 %call1, %call
|
||||||
|
%call3 = tail call signext i32 %Func2(i32 signext %i)
|
||||||
|
%add4 = add nsw i32 %add2, %call3
|
||||||
|
br label %if.end
|
||||||
|
|
||||||
|
if.end: ; preds = %entry, %if.then
|
||||||
|
%Sum.0 = phi i32 [ %add4, %if.then ], [ %call, %entry ]
|
||||||
|
%call5 = tail call signext i32 %Func(i32 signext %i)
|
||||||
|
%add6 = add nsw i32 %call5, %Sum.0
|
||||||
|
ret i32 %add6
|
||||||
|
}
|
||||||
|
|
||||||
|
; Check for multiple TOC saves with if then else where neither dominates the other.
|
||||||
|
define signext i32 @test3(i32 signext %i, i32 (i32)* nocapture %Func, i32 (i32)* nocapture %Func2) {
|
||||||
|
; CHECK-LABEL: test3:
|
||||||
|
; CHECK: std 2, 24(1)
|
||||||
|
; CHECK: std 2, 24(1)
|
||||||
|
; CHECK-NOT: std 2, 24(1)
|
||||||
|
entry:
|
||||||
|
%tobool = icmp eq i32 %i, 0
|
||||||
|
br i1 %tobool, label %if.else, label %if.then
|
||||||
|
|
||||||
|
if.then: ; preds = %entry
|
||||||
|
%call = tail call signext i32 %Func(i32 signext %i)
|
||||||
|
br label %if.end
|
||||||
|
|
||||||
|
if.else: ; preds = %entry
|
||||||
|
%call1 = tail call signext i32 %Func2(i32 signext 0)
|
||||||
|
br label %if.end
|
||||||
|
|
||||||
|
if.end: ; preds = %if.else, %if.then
|
||||||
|
%Sum.0 = phi i32 [ %call, %if.then ], [ %call1, %if.else ]
|
||||||
|
%call3 = tail call signext i32 %Func(i32 signext %i)
|
||||||
|
%add4 = add nsw i32 %call3, %Sum.0
|
||||||
|
ret i32 %add4
|
||||||
|
}
|
||||||
|
|
||||||
|
define signext i32 @test4(i32 signext %i, i32 (i32)* nocapture %Func, i32 (i32)* nocapture %Func2) {
|
||||||
|
; CHECK-LABEL: test4:
|
||||||
|
; CHECK: std 2, 24(1)
|
||||||
|
; CHECK-NOT: std 2, 24(1)
|
||||||
|
|
||||||
|
entry:
|
||||||
|
%call = tail call signext i32 %Func(i32 signext %i)
|
||||||
|
%tobool = icmp eq i32 %i, 0
|
||||||
|
br i1 %tobool, label %if.else, label %if.then
|
||||||
|
|
||||||
|
if.then: ; preds = %entry
|
||||||
|
%call1 = tail call signext i32 %Func(i32 signext %i)
|
||||||
|
br label %if.end
|
||||||
|
|
||||||
|
if.else: ; preds = %entry
|
||||||
|
%call3 = tail call signext i32 %Func2(i32 signext 0)
|
||||||
|
br label %if.end
|
||||||
|
|
||||||
|
if.end: ; preds = %if.else, %if.then
|
||||||
|
%call1.pn = phi i32 [ %call1, %if.then ], [ %call3, %if.else ]
|
||||||
|
%Sum.0 = add nsw i32 %call1.pn, %call
|
||||||
|
ret i32 %Sum.0
|
||||||
|
}
|
||||||
|
|
||||||
|
; Check for multiple TOC saves with if then where neither is redundant.
|
||||||
|
define signext i32 @test5(i32 signext %i, i32 (i32)* nocapture %Func, i32 (i32)* nocapture readnone %Func2) {
|
||||||
|
entry:
|
||||||
|
; CHECK-LABEL: test5:
|
||||||
|
; CHECK: std 2, 24(1)
|
||||||
|
; CHECK: std 2, 24(1)
|
||||||
|
|
||||||
|
%tobool = icmp eq i32 %i, 0
|
||||||
|
br i1 %tobool, label %if.end, label %if.then
|
||||||
|
|
||||||
|
if.then: ; preds = %entry
|
||||||
|
%call = tail call signext i32 %Func(i32 signext %i)
|
||||||
|
br label %if.end
|
||||||
|
|
||||||
|
if.end: ; preds = %entry, %if.then
|
||||||
|
%Sum.0 = phi i32 [ %call, %if.then ], [ 0, %entry ]
|
||||||
|
%call1 = tail call signext i32 %Func(i32 signext %i)
|
||||||
|
%add2 = add nsw i32 %call1, %Sum.0
|
||||||
|
ret i32 %add2
|
||||||
|
}
|
||||||
|
|
||||||
|
; Check for multiple TOC saves if there are dynamic allocations on the stack.
|
||||||
|
define signext i32 @test6(i32 signext %i, i32 (i32)* nocapture %Func, i32 (i32)* nocapture %Func2) {
|
||||||
|
entry:
|
||||||
|
; CHECK-LABEL: test6:
|
||||||
|
; CHECK: std 2, 24(1)
|
||||||
|
; CHECK: std 2, 24(1)
|
||||||
|
|
||||||
|
%conv = sext i32 %i to i64
|
||||||
|
%0 = alloca i8, i64 %conv, align 16
|
||||||
|
%1 = bitcast i8* %0 to i32*
|
||||||
|
%call = tail call signext i32 %Func(i32 signext %i)
|
||||||
|
call void @useAlloca(i32* nonnull %1, i32 signext %call)
|
||||||
|
%call1 = call signext i32 %Func2(i32 signext %i)
|
||||||
|
%add2 = add nsw i32 %call1, %call
|
||||||
|
ret i32 %add2
|
||||||
|
}
|
||||||
|
|
||||||
|
declare void @useAlloca(i32*, i32 signext)
|
Loading…
Reference in New Issue