Teach 2addr pass to be do more commuting. If both uses of a two-address instruction are killed, but the first operand has a use before and after the def, commute if the second operand does not suffer from the same issue.

%reg1028<def> = EXTRACT_SUBREG %reg1027<kill>, 1                                                                                                                                     
%reg1029<def> = MOV8rr %reg1028                                                                                                                                                      
%reg1029<def> = SHR8ri %reg1029, 7, %EFLAGS<imp-def,dead>                                                                                                                            
insert => %reg1030<def> = MOV8rr %reg1028                                                                                                                                            
%reg1030<def> = ADD8rr %reg1028<kill>, %reg1029<kill>, %EFLAGS<imp-def,dead>                                                                                                         

In this case, it might not be possible to coalesce the second MOV8rr                                                                                                                 
instruction if the first one is coalesced. So it would be profitable to                                                                                                              
commute it:                                                                                                                                                                          
%reg1028<def> = EXTRACT_SUBREG %reg1027<kill>, 1                                                                                                                                     
%reg1029<def> = MOV8rr %reg1028                                                                                                                                                      
%reg1029<def> = SHR8ri %reg1029, 7, %EFLAGS<imp-def,dead>                                                                                                                            
insert => %reg1030<def> = MOV8rr %reg1029                                                                                                                                            
%reg1030<def> = ADD8rr %reg1029<kill>, %reg1028<kill>, %EFLAGS<imp-def,dead>

llvm-svn: 62954
This commit is contained in:
Evan Cheng 2009-01-25 03:53:59 +00:00
parent 8bad1c5903
commit abda665f5f
5 changed files with 132 additions and 9 deletions

View File

@ -49,6 +49,7 @@ using namespace llvm;
STATISTIC(NumTwoAddressInstrs, "Number of two-address instructions");
STATISTIC(NumCommuted , "Number of instructions commuted to coalesce");
STATISTIC(NumAggrCommuted , "Number of instructions aggressively commuted");
STATISTIC(NumConvertedTo3Addr, "Number of instructions promoted to 3-address");
STATISTIC(Num3AddrSunk, "Number of 3-address instructions sunk");
STATISTIC(NumReMats, "Number of instructions re-materialized");
@ -70,6 +71,15 @@ namespace {
MachineBasicBlock *MBB, unsigned Loc,
DenseMap<MachineInstr*, unsigned> &DistanceMap);
bool NoUseAfterLastDef(unsigned Reg, MachineBasicBlock *MBB, unsigned Dist,
DenseMap<MachineInstr*, unsigned> &DistanceMap,
unsigned &LastDef);
bool isProfitableToCommute(unsigned regB, unsigned regC,
MachineInstr *MI, MachineBasicBlock *MBB,
unsigned Dist,
DenseMap<MachineInstr*, unsigned> &DistanceMap);
bool CommuteInstruction(MachineBasicBlock::iterator &mi,
MachineFunction::iterator &mbbi,
unsigned RegC, unsigned Dist,
@ -230,8 +240,6 @@ TwoAddressInstructionPass::isProfitableToReMat(unsigned Reg,
for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg),
UE = MRI->use_end(); UI != UE; ++UI) {
MachineOperand &UseMO = UI.getOperand();
if (!UseMO.isUse())
continue;
MachineInstr *UseMI = UseMO.getParent();
MachineBasicBlock *UseMBB = UseMI->getParent();
if (UseMBB == MBB) {
@ -255,6 +263,82 @@ TwoAddressInstructionPass::isProfitableToReMat(unsigned Reg,
return MBB == DefMI->getParent();
}
/// NoUseAfterLastDef - Return true if there are no intervening uses between the
/// last instruction in the MBB that defines the specified register and the
/// two-address instruction which is being processed. It also returns the last
/// def location by reference
bool TwoAddressInstructionPass::NoUseAfterLastDef(unsigned Reg,
MachineBasicBlock *MBB, unsigned Dist,
DenseMap<MachineInstr*, unsigned> &DistanceMap,
unsigned &LastDef) {
LastDef = 0;
unsigned LastUse = Dist;
for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(Reg),
E = MRI->reg_end(); I != E; ++I) {
MachineOperand &MO = I.getOperand();
MachineInstr *MI = MO.getParent();
if (MI->getParent() != MBB)
continue;
DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(MI);
if (DI == DistanceMap.end())
continue;
if (MO.isUse() && DI->second < LastUse)
LastUse = DI->second;
if (MO.isDef() && DI->second > LastDef)
LastDef = DI->second;
}
return !(LastUse > LastDef && LastUse < Dist);
}
/// isProfitableToReMat - Return true if it's potentially profitable to commute
/// the two-address instruction that's being processed.
bool
TwoAddressInstructionPass::isProfitableToCommute(unsigned regB, unsigned regC,
MachineInstr *MI, MachineBasicBlock *MBB,
unsigned Dist, DenseMap<MachineInstr*, unsigned> &DistanceMap) {
// Determine if it's profitable to commute this two address instruction. In
// general, we want no uses between this instruction and the definition of
// the two-address register.
// e.g.
// %reg1028<def> = EXTRACT_SUBREG %reg1027<kill>, 1
// %reg1029<def> = MOV8rr %reg1028
// %reg1029<def> = SHR8ri %reg1029, 7, %EFLAGS<imp-def,dead>
// insert => %reg1030<def> = MOV8rr %reg1028
// %reg1030<def> = ADD8rr %reg1028<kill>, %reg1029<kill>, %EFLAGS<imp-def,dead>
// In this case, it might not be possible to coalesce the second MOV8rr
// instruction if the first one is coalesced. So it would be profitable to
// commute it:
// %reg1028<def> = EXTRACT_SUBREG %reg1027<kill>, 1
// %reg1029<def> = MOV8rr %reg1028
// %reg1029<def> = SHR8ri %reg1029, 7, %EFLAGS<imp-def,dead>
// insert => %reg1030<def> = MOV8rr %reg1029
// %reg1030<def> = ADD8rr %reg1029<kill>, %reg1028<kill>, %EFLAGS<imp-def,dead>
if (!MI->killsRegister(regC))
return false;
// Ok, we have something like:
// %reg1030<def> = ADD8rr %reg1028<kill>, %reg1029<kill>, %EFLAGS<imp-def,dead>
// let's see if it's worth commuting it.
// If there is a use of regC between its last def (could be livein) and this
// instruction, then bail.
unsigned LastDefC = 0;
if (!NoUseAfterLastDef(regC, MBB, Dist, DistanceMap, LastDefC))
return false;
// If there is a use of regB between its last def (could be livein) and this
// instruction, then go ahead and make this transformation.
unsigned LastDefB = 0;
if (!NoUseAfterLastDef(regB, MBB, Dist, DistanceMap, LastDefB))
return true;
// Since there are no intervening uses for both registers, then commute
// if the def of regC is closer. Its live interval is shorter.
return LastDefB && LastDefC && LastDefC > LastDefB;
}
/// CommuteInstruction - Commute a two-address instruction and update the basic
/// block, distance map, and live variables if needed. Return true if it is
/// successful.
@ -419,6 +503,17 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
}
}
// If it's profitable to commute the instruction, do so.
if (TID.isCommutable() && mi->getNumOperands() >= 3) {
unsigned regC = mi->getOperand(3-si).getReg();
if (isProfitableToCommute(regB, regC, mi, mbbi, Dist, DistanceMap))
if (CommuteInstruction(mi, mbbi, regC, Dist, DistanceMap)) {
++NumAggrCommuted;
++NumCommuted;
regB = regC;
}
}
InstructionRearranged:
const TargetRegisterClass* rc = MRI->getRegClass(regA);
MachineInstr *DefMI = MRI->getVRegDef(regB);
@ -436,7 +531,10 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
TII->copyRegToReg(*mbbi, mi, regA, regB, rc, rc);
}
MachineBasicBlock::iterator prevMi = prior(mi);
MachineBasicBlock::iterator prevMI = prior(mi);
// Update DistanceMap.
DistanceMap.insert(std::make_pair(prevMI, Dist));
DistanceMap[mi] = ++Dist;
// Update live variables for regB.
if (LV) {
@ -446,13 +544,13 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
varInfoB.UsedBlocks[mbbi->getNumber()] = true;
if (LV->removeVirtualRegisterKilled(regB, mi))
LV->addVirtualRegisterKilled(regB, prevMi);
LV->addVirtualRegisterKilled(regB, prevMI);
if (LV->removeVirtualRegisterDead(regB, mi))
LV->addVirtualRegisterDead(regB, prevMi);
LV->addVirtualRegisterDead(regB, prevMI);
}
DOUT << "\t\tprepend:\t"; DEBUG(prevMi->print(*cerr.stream(), &TM));
DOUT << "\t\tprepend:\t"; DEBUG(prevMI->print(*cerr.stream(), &TM));
// Replace all occurences of regB with regA.
for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) {

View File

@ -1,4 +1,4 @@
; RUN: llvm-as < %s | llc -march=x86 -stats |& grep {Number of re-materialization} | grep 3
; RUN: llvm-as < %s | llc -march=x86 -stats |& grep {Number of re-materialization} | grep 4
; RUN: llvm-as < %s | llc -march=x86 -stats |& grep {Number of dead spill slots removed}
; rdar://5761454

View File

@ -1,4 +1,4 @@
; RUN: llvm-as < %s | llc -mtriple=i686-pc-linux -realign-stack=1 -mattr=sse2 | grep movaps | count 76
; RUN: llvm-as < %s | llc -mtriple=i686-pc-linux -realign-stack=1 -mattr=sse2 | grep movaps | count 75
; RUN: llvm-as < %s | llc -mtriple=i686-pc-linux -realign-stack=0 -mattr=sse2 | grep movaps | count 1
; PR2539

View File

@ -1,6 +1,6 @@
; RUN: llvm-as < %s | llc -march=x86 -mattr=sse41 -stack-alignment=16 > %t
; RUN: grep pmul %t | count 12
; RUN: grep mov %t | count 19
; RUN: grep mov %t | count 15
define <4 x i32> @a(<4 x i32> %i) nounwind {
%A = mul <4 x i32> %i, < i32 117, i32 117, i32 117, i32 117 >

View File

@ -0,0 +1,25 @@
; RUN: llvm-as < %s | llc -march=x86 -join-cross-class-copies -stats |& \
; RUN: grep {twoaddrinstr} | grep {Number of instructions aggressively commuted}
; rdar://6523745
@"\01LC" = internal constant [4 x i8] c"%d\0A\00" ; <[4 x i8]*> [#uses=1]
define i32 @main() nounwind {
bb1.thread:
br label %bb1
bb1: ; preds = %bb1, %bb1.thread
%i.0.reg2mem.0 = phi i32 [ 0, %bb1.thread ], [ %indvar.next, %bb1 ] ; <i32> [#uses=2]
%0 = trunc i32 %i.0.reg2mem.0 to i8 ; <i8> [#uses=1]
%1 = sdiv i8 %0, 2 ; <i8> [#uses=1]
%2 = sext i8 %1 to i32 ; <i32> [#uses=1]
%3 = tail call i32 (i8*, ...)* @printf(i8* getelementptr ([4 x i8]* @"\01LC", i32 0, i32 0), i32 %2) nounwind ; <i32> [#uses=0]
%indvar.next = add i32 %i.0.reg2mem.0, 1 ; <i32> [#uses=2]
%exitcond = icmp eq i32 %indvar.next, 258 ; <i1> [#uses=1]
br i1 %exitcond, label %bb2, label %bb1
bb2: ; preds = %bb1
ret i32 0
}
declare i32 @printf(i8*, ...) nounwind