R600: Do not predicated basic block with multiple alu clause

Test is not included as it is several 1000 lines long.
To test this functionnality, a test case must generate at least 2 ALU clauses,
where an ALU clause is ~110 instructions long.

NOTE: This is a candidate for the stable branch.
llvm-svn: 185943
This commit is contained in:
Vincent Lejeune 2013-07-09 15:03:33 +00:00
parent b8aac8d720
commit ce499744b3
7 changed files with 65 additions and 8 deletions

View File

@ -148,7 +148,11 @@ bool AMDGPUPassConfig::addPostRegAlloc() {
}
bool AMDGPUPassConfig::addPreSched2() {
const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
addPass(createR600EmitClauseMarkers(*TM));
}
addPass(&IfConverterID);
return false;
}
@ -158,7 +162,6 @@ bool AMDGPUPassConfig::addPreEmitPass() {
if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
addPass(createAMDGPUCFGPreparationPass(*TM));
addPass(createAMDGPUCFGStructurizerPass(*TM));
addPass(createR600EmitClauseMarkers(*TM));
addPass(createR600ExpandSpecialInstrsPass(*TM));
addPass(&FinalizeMachineBundlesID);
addPass(createR600Packetizer(*TM));

View File

@ -256,6 +256,7 @@ private:
ClauseContent.push_back(MILit);
}
}
assert(ClauseContent.size() < 128 && "ALU clause is too big");
ClauseHead->getOperand(7).setImm(ClauseContent.size() - 1);
return ClauseFile(ClauseHead, ClauseContent);
}
@ -276,6 +277,7 @@ private:
void
EmitALUClause(MachineBasicBlock::iterator InsertPos, ClauseFile &Clause,
unsigned &CfCount) {
Clause.first->getOperand(0).setImm(0);
CounterPropagateAddr(Clause.first, CfCount);
MachineBasicBlock *BB = Clause.first->getParent();
BuildMI(BB, InsertPos->getDebugLoc(), TII->get(AMDGPU::ALU_CLAUSE))

View File

@ -32,6 +32,7 @@ class R600EmitClauseMarkersPass : public MachineFunctionPass {
private:
static char ID;
const R600InstrInfo *TII;
int Address;
unsigned OccupiedDwords(MachineInstr *MI) const {
switch (MI->getOpcode()) {
@ -159,7 +160,7 @@ private:
}
MachineBasicBlock::iterator
MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const {
MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) {
MachineBasicBlock::iterator ClauseHead = I;
std::vector<std::pair<unsigned, unsigned> > KCacheBanks;
bool PushBeforeModifier = false;
@ -199,20 +200,25 @@ private:
unsigned Opcode = PushBeforeModifier ?
AMDGPU::CF_ALU_PUSH_BEFORE : AMDGPU::CF_ALU;
BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead), TII->get(Opcode))
.addImm(0) // ADDR
// We don't use the ADDR field until R600ControlFlowFinalizer pass, where
// it is safe to assume it is 0. However if we always put 0 here, the ifcvt
// pass may assume that identical ALU clause starter at the beginning of a
// true and false branch can be factorized which is not the case.
.addImm(Address++) // ADDR
.addImm(KCacheBanks.empty()?0:KCacheBanks[0].first) // KB0
.addImm((KCacheBanks.size() < 2)?0:KCacheBanks[1].first) // KB1
.addImm(KCacheBanks.empty()?0:2) // KM0
.addImm((KCacheBanks.size() < 2)?0:2) // KM1
.addImm(KCacheBanks.empty()?0:KCacheBanks[0].second) // KLINE0
.addImm((KCacheBanks.size() < 2)?0:KCacheBanks[1].second) // KLINE1
.addImm(AluInstCount); // COUNT
.addImm(AluInstCount) // COUNT
.addImm(1); // Enabled
return I;
}
public:
R600EmitClauseMarkersPass(TargetMachine &tm) : MachineFunctionPass(ID),
TII(0) { }
TII(0), Address(0) { }
virtual bool runOnMachineFunction(MachineFunction &MF) {
TII = static_cast<const R600InstrInfo *>(MF.getTarget().getInstrInfo());

View File

@ -651,6 +651,17 @@ int R600InstrInfo::getBranchInstr(const MachineOperand &op) const {
};
}
static
MachineBasicBlock::iterator FindLastAluClause(MachineBasicBlock &MBB) {
for (MachineBasicBlock::reverse_iterator It = MBB.rbegin(), E = MBB.rend();
It != E; ++It) {
if (It->getOpcode() == AMDGPU::CF_ALU ||
It->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE)
return llvm::prior(It.base());
}
return MBB.end();
}
unsigned
R600InstrInfo::InsertBranch(MachineBasicBlock &MBB,
MachineBasicBlock *TBB,
@ -672,6 +683,11 @@ R600InstrInfo::InsertBranch(MachineBasicBlock &MBB,
BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND))
.addMBB(TBB)
.addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);
if (CfAlu == MBB.end())
return 1;
assert (CfAlu->getOpcode() == AMDGPU::CF_ALU);
CfAlu->setDesc(get(AMDGPU::CF_ALU_PUSH_BEFORE));
return 1;
}
} else {
@ -683,6 +699,11 @@ R600InstrInfo::InsertBranch(MachineBasicBlock &MBB,
.addMBB(TBB)
.addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(FBB);
MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);
if (CfAlu == MBB.end())
return 2;
assert (CfAlu->getOpcode() == AMDGPU::CF_ALU);
CfAlu->setDesc(get(AMDGPU::CF_ALU_PUSH_BEFORE));
return 2;
}
}
@ -706,6 +727,11 @@ R600InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
clearFlag(predSet, 0, MO_FLAG_PUSH);
I->eraseFromParent();
MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);
if (CfAlu == MBB.end())
break;
assert (CfAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE);
CfAlu->setDesc(get(AMDGPU::CF_ALU));
break;
}
case AMDGPU::JUMP:
@ -726,6 +752,11 @@ R600InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
clearFlag(predSet, 0, MO_FLAG_PUSH);
I->eraseFromParent();
MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);
if (CfAlu == MBB.end())
break;
assert (CfAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE);
CfAlu->setDesc(get(AMDGPU::CF_ALU));
break;
}
case AMDGPU::JUMP:
@ -760,6 +791,15 @@ R600InstrInfo::isPredicable(MachineInstr *MI) const {
if (MI->getOpcode() == AMDGPU::KILLGT) {
return false;
} else if (MI->getOpcode() == AMDGPU::CF_ALU) {
// If the clause start in the middle of MBB then the MBB has more
// than a single clause, unable to predicate several clauses.
if (MI->getParent()->begin() != MachineBasicBlock::iterator(MI))
return false;
// TODO: We don't support KC merging atm
if (MI->getOperand(3).getImm() != 0 || MI->getOperand(4).getImm() != 0)
return false;
return true;
} else if (isVector(*MI)) {
return false;
} else {
@ -855,6 +895,11 @@ R600InstrInfo::PredicateInstruction(MachineInstr *MI,
const SmallVectorImpl<MachineOperand> &Pred) const {
int PIdx = MI->findFirstPredOperandIdx();
if (MI->getOpcode() == AMDGPU::CF_ALU) {
MI->getOperand(8).setImm(0);
return true;
}
if (PIdx != -1) {
MachineOperand &PMO = MI->getOperand(PIdx);
PMO.setReg(Pred[2].getReg());

View File

@ -563,7 +563,7 @@ class ALU_CLAUSE<bits<4> inst, string OpName> : AMDGPUInst <(outs),
(ins i32imm:$ADDR, i32imm:$KCACHE_BANK0, i32imm:$KCACHE_BANK1,
KCACHE:$KCACHE_MODE0, KCACHE:$KCACHE_MODE1,
i32imm:$KCACHE_ADDR0, i32imm:$KCACHE_ADDR1,
i32imm:$COUNT),
i32imm:$COUNT, i32imm:$Enabled),
!strconcat(OpName, " $COUNT, @$ADDR, "
"KC0[$KCACHE_MODE0], KC1[$KCACHE_MODE1]"),
[] >, CF_ALU_WORD0, CF_ALU_WORD1 {

View File

@ -304,7 +304,8 @@ bool R600Packetizer::runOnMachineFunction(MachineFunction &Fn) {
MachineBasicBlock::iterator End = MBB->end();
MachineBasicBlock::iterator MI = MBB->begin();
while (MI != End) {
if (MI->isKill()) {
if (MI->isKill() ||
(MI->getOpcode() == AMDGPU::CF_ALU && !MI->getOperand(8).getImm())) {
MachineBasicBlock::iterator DeleteMI = MI;
++MI;
MBB->erase(DeleteMI);

View File

@ -1,6 +1,6 @@
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
; CHECK: JUMP @3
; CHECK: JUMP @7
; CHECK: EXPORT
; CHECK-NOT: EXPORT