AMDGPU: Move hazard avoidance out of waitcnt pass.

This is mostly moving VMEM clause breaking into
the hazard recognizer. Also move another hazard
currently handled in the waitcnt pass.

Also stops breaking clauses unless xnack is enabled.

llvm-svn: 318557
This commit is contained in:
Matt Arsenault 2017-11-17 21:35:32 +00:00
parent c0a81071d3
commit a41351e37c
7 changed files with 693 additions and 97 deletions

View File

@ -806,10 +806,14 @@ public:
return getGeneration() >= AMDGPUSubtarget::GFX9;
}
bool hasReadM0Hazard() const {
bool hasReadM0MovRelInterpHazard() const {
return getGeneration() >= AMDGPUSubtarget::GFX9;
}
bool hasReadM0SendMsgHazard() const {
return getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS;
}
unsigned getKernArgSegmentSize(const MachineFunction &MF,
unsigned ExplictArgBytes) const;

View File

@ -87,6 +87,18 @@ static bool isSMovRel(unsigned Opcode) {
}
}
static bool isSendMsgTraceDataOrGDS(const MachineInstr &MI) {
switch (MI.getOpcode()) {
case AMDGPU::S_SENDMSG:
case AMDGPU::S_SENDMSGHALT:
case AMDGPU::S_TTRACEDATA:
return true;
default:
// TODO: GDS
return false;
}
}
static unsigned getHWReg(const SIInstrInfo *TII, const MachineInstr &RegInstr) {
const MachineOperand *RegOp = TII->getNamedOperand(RegInstr,
AMDGPU::OpName::simm16);
@ -100,7 +112,10 @@ GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
if (SIInstrInfo::isSMRD(*MI) && checkSMRDHazards(MI) > 0)
return NoopHazard;
if (SIInstrInfo::isVMEM(*MI) && checkVMEMHazards(MI) > 0)
// FIXME: Should flat be considered vmem?
if ((SIInstrInfo::isVMEM(*MI) ||
SIInstrInfo::isFLAT(*MI))
&& checkVMEMHazards(MI) > 0)
return NoopHazard;
if (SIInstrInfo::isVALU(*MI) && checkVALUHazards(MI) > 0)
@ -124,7 +139,12 @@ GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
if (isRFE(MI->getOpcode()) && checkRFEHazards(MI) > 0)
return NoopHazard;
if ((TII.isVINTRP(*MI) || isSMovRel(MI->getOpcode())) &&
if (ST.hasReadM0MovRelInterpHazard() &&
(TII.isVINTRP(*MI) || isSMovRel(MI->getOpcode())) &&
checkReadM0Hazards(MI) > 0)
return NoopHazard;
if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(*MI) &&
checkReadM0Hazards(MI) > 0)
return NoopHazard;
@ -144,26 +164,20 @@ unsigned GCNHazardRecognizer::PreEmitNoops(MachineInstr *MI) {
if (SIInstrInfo::isSMRD(*MI))
return std::max(WaitStates, checkSMRDHazards(MI));
if (SIInstrInfo::isVALU(*MI)) {
WaitStates = std::max(WaitStates, checkVALUHazards(MI));
if (SIInstrInfo::isVALU(*MI))
WaitStates = std::max(WaitStates, checkVALUHazards(MI));
if (SIInstrInfo::isVMEM(*MI))
WaitStates = std::max(WaitStates, checkVMEMHazards(MI));
if (SIInstrInfo::isVMEM(*MI) || SIInstrInfo::isFLAT(*MI))
WaitStates = std::max(WaitStates, checkVMEMHazards(MI));
if (SIInstrInfo::isDPP(*MI))
WaitStates = std::max(WaitStates, checkDPPHazards(MI));
if (SIInstrInfo::isDPP(*MI))
WaitStates = std::max(WaitStates, checkDPPHazards(MI));
if (isDivFMas(MI->getOpcode()))
WaitStates = std::max(WaitStates, checkDivFMasHazards(MI));
if (isDivFMas(MI->getOpcode()))
WaitStates = std::max(WaitStates, checkDivFMasHazards(MI));
if (isRWLane(MI->getOpcode()))
WaitStates = std::max(WaitStates, checkRWLaneHazards(MI));
if (TII.isVINTRP(*MI))
WaitStates = std::max(WaitStates, checkReadM0Hazards(MI));
return WaitStates;
}
if (isRWLane(MI->getOpcode()))
WaitStates = std::max(WaitStates, checkRWLaneHazards(MI));
if (isSGetReg(MI->getOpcode()))
return std::max(WaitStates, checkGetRegHazards(MI));
@ -174,7 +188,11 @@ unsigned GCNHazardRecognizer::PreEmitNoops(MachineInstr *MI) {
if (isRFE(MI->getOpcode()))
return std::max(WaitStates, checkRFEHazards(MI));
if (TII.isVINTRP(*MI) || isSMovRel(MI->getOpcode()))
if (ST.hasReadM0MovRelInterpHazard() && (TII.isVINTRP(*MI) ||
isSMovRel(MI->getOpcode())))
return std::max(WaitStates, checkReadM0Hazards(MI));
if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(*MI))
return std::max(WaitStates, checkReadM0Hazards(MI));
return WaitStates;
@ -282,12 +300,14 @@ void GCNHazardRecognizer::addClauseInst(const MachineInstr &MI) {
addRegsToSet(TRI, MI.uses(), ClauseUses);
}
int GCNHazardRecognizer::checkSMEMSoftClauseHazards(MachineInstr *SMEM) {
int GCNHazardRecognizer::checkSoftClauseHazards(MachineInstr *MEM) {
// SMEM soft clause are only present on VI+, and only matter if xnack is
// enabled.
if (!ST.isXNACKEnabled())
return 0;
bool IsSMRD = TII.isSMRD(*MEM);
resetClause();
// A soft-clause is any group of consecutive SMEM instructions. The
@ -303,7 +323,10 @@ int GCNHazardRecognizer::checkSMEMSoftClauseHazards(MachineInstr *SMEM) {
for (MachineInstr *MI : EmittedInstrs) {
// When we hit a non-SMEM instruction then we have passed the start of the
// clause and we can stop.
if (!MI || !SIInstrInfo::isSMRD(*MI))
if (!MI)
break;
if (IsSMRD != SIInstrInfo::isSMRD(*MI))
break;
addClauseInst(*MI);
@ -312,13 +335,13 @@ int GCNHazardRecognizer::checkSMEMSoftClauseHazards(MachineInstr *SMEM) {
if (ClauseDefs.none())
return 0;
// FIXME: When we support stores, we need to make sure not to put loads and
// stores in the same clause if they use the same address. For now, just
// start a new clause whenever we see a store.
if (SMEM->mayStore())
// We need to make sure not to put loads and stores in the same clause if they
// use the same address. For now, just start a new clause whenever we see a
// store.
if (MEM->mayStore())
return 1;
addClauseInst(*SMEM);
addClauseInst(*MEM);
// If the set of defs and uses intersect then we cannot add this instruction
// to the clause, so we have a hazard.
@ -329,7 +352,7 @@ int GCNHazardRecognizer::checkSMRDHazards(MachineInstr *SMRD) {
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
int WaitStatesNeeded = 0;
WaitStatesNeeded = checkSMEMSoftClauseHazards(SMRD);
WaitStatesNeeded = checkSoftClauseHazards(SMRD);
// This SMRD hazard only affects SI.
if (ST.getGeneration() != SISubtarget::SOUTHERN_ISLANDS)
@ -369,18 +392,15 @@ int GCNHazardRecognizer::checkSMRDHazards(MachineInstr *SMRD) {
}
int GCNHazardRecognizer::checkVMEMHazards(MachineInstr* VMEM) {
const SIInstrInfo *TII = ST.getInstrInfo();
if (ST.getGeneration() < SISubtarget::VOLCANIC_ISLANDS)
return 0;
const SIRegisterInfo &TRI = TII->getRegisterInfo();
int WaitStatesNeeded = checkSoftClauseHazards(VMEM);
// A read of an SGPR by a VMEM instruction requires 5 wait states when the
// SGPR was written by a VALU Instruction.
int VmemSgprWaitStates = 5;
int WaitStatesNeeded = 0;
auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); };
const int VmemSgprWaitStates = 5;
auto IsHazardDefFn = [this] (MachineInstr *MI) { return TII.isVALU(*MI); };
for (const MachineOperand &Use : VMEM->uses()) {
if (!Use.isReg() || TRI.isVGPR(MF.getRegInfo(), Use.getReg()))
@ -598,11 +618,8 @@ int GCNHazardRecognizer::checkAnyInstHazards(MachineInstr *MI) {
}
int GCNHazardRecognizer::checkReadM0Hazards(MachineInstr *MI) {
if (!ST.hasReadM0Hazard())
return 0;
const SIInstrInfo *TII = ST.getInstrInfo();
int SMovRelWaitStates = 1;
const int SMovRelWaitStates = 1;
auto IsHazardFn = [TII] (MachineInstr *MI) {
return TII->isSALU(*MI);
};

View File

@ -58,7 +58,7 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer {
[](MachineInstr *) { return true; });
int getWaitStatesSinceSetReg(function_ref<bool(MachineInstr *)> IsHazard);
int checkSMEMSoftClauseHazards(MachineInstr *SMEM);
int checkSoftClauseHazards(MachineInstr *SMEM);
int checkSMRDHazards(MachineInstr *SMRD);
int checkVMEMHazards(MachineInstr* VMEM);
int checkDPPHazards(MachineInstr *DPP);

View File

@ -1522,8 +1522,6 @@ void SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,
ScoreBrackets->dump();
});
bool InsertNOP = false;
// Walk over the instructions.
for (MachineBasicBlock::iterator Iter = Block.begin(), E = Block.end();
Iter != E;) {
@ -1624,58 +1622,6 @@ void SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,
VCCZBugHandledSet.insert(&Inst);
}
if (ST->getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) {
// This avoids a s_nop after a waitcnt has just been inserted.
if (!SWaitInst && InsertNOP) {
BuildMI(Block, Inst, DebugLoc(), TII->get(AMDGPU::S_NOP)).addImm(0);
}
InsertNOP = false;
// Any occurrence of consecutive VMEM or SMEM instructions forms a VMEM
// or SMEM clause, respectively.
//
// The temporary workaround is to break the clauses with S_NOP.
//
// The proper solution would be to allocate registers such that all source
// and destination registers don't overlap, e.g. this is illegal:
// r0 = load r2
// r2 = load r0
bool IsSMEM = false;
bool IsVMEM = false;
if (TII->isSMRD(Inst))
IsSMEM = true;
else if (TII->usesVM_CNT(Inst))
IsVMEM = true;
++Iter;
if (Iter == E)
break;
MachineInstr &Next = *Iter;
// TODO: How about consecutive SMEM instructions?
// The comments above says break the clause but the code does not.
// if ((TII->isSMRD(next) && isSMEM) ||
if (!IsSMEM && TII->usesVM_CNT(Next) && IsVMEM &&
// TODO: Enable this check when hasSoftClause is upstreamed.
// ST->hasSoftClauses() &&
ST->isXNACKEnabled()) {
// Insert a NOP to break the clause.
InsertNOP = true;
continue;
}
// There must be "S_NOP 0" between an instruction writing M0 and
// S_SENDMSG.
if ((Next.getOpcode() == AMDGPU::S_SENDMSG ||
Next.getOpcode() == AMDGPU::S_SENDMSGHALT) &&
Inst.definesRegister(AMDGPU::M0))
InsertNOP = true;
continue;
}
++Iter;
}

View File

@ -0,0 +1,580 @@
# RUN: llc -march=amdgcn -mcpu=carrizo -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,XNACK %s
# RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,NOXNACK %s
---
# Trivial clause at beginning of program
name: trivial_clause_load_flat4_x1
body: |
bb.0:
; GCN-LABEL: name: trivial_clause_load_flat4_x1
; GCN: %vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: S_ENDPGM
%vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
S_ENDPGM
...
---
# Trivial clause at beginning of program
name: trivial_clause_load_flat4_x2
body: |
bb.0:
; GCN-LABEL: name: trivial_clause_load_flat4_x2
; GCN: %vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: %vgpr1 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: S_ENDPGM
%vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
%vgpr1 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
S_ENDPGM
...
---
# Trivial clause at beginning of program
name: trivial_clause_load_flat4_x3
body: |
bb.0:
; GCN-LABEL: name: trivial_clause_load_flat4_x3
; GCN: %vgpr0 = FLAT_LOAD_DWORD %vgpr3_vgpr4, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: %vgpr1 = FLAT_LOAD_DWORD %vgpr5_vgpr6, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: %vgpr2 = FLAT_LOAD_DWORD %vgpr7_vgpr8, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: S_ENDPGM
%vgpr0 = FLAT_LOAD_DWORD %vgpr3_vgpr4, 0, 0, 0, implicit %exec, implicit %flat_scr
%vgpr1 = FLAT_LOAD_DWORD %vgpr5_vgpr6, 0, 0, 0, implicit %exec, implicit %flat_scr
%vgpr2 = FLAT_LOAD_DWORD %vgpr7_vgpr8, 0, 0, 0, implicit %exec, implicit %flat_scr
S_ENDPGM
...
---
# Trivial clause at beginning of program
name: trivial_clause_load_flat4_x4
body: |
bb.0:
; GCN-LABEL: name: trivial_clause_load_flat4_x4
; GCN: %vgpr0 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: %vgpr1 = FLAT_LOAD_DWORD %vgpr6_vgpr7, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: %vgpr2 = FLAT_LOAD_DWORD %vgpr8_vgpr9, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: %vgpr3 = FLAT_LOAD_DWORD %vgpr10_vgpr11, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: S_ENDPGM
%vgpr0 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
%vgpr1 = FLAT_LOAD_DWORD %vgpr6_vgpr7, 0, 0, 0, implicit %exec, implicit %flat_scr
%vgpr2 = FLAT_LOAD_DWORD %vgpr8_vgpr9, 0, 0, 0, implicit %exec, implicit %flat_scr
%vgpr3 = FLAT_LOAD_DWORD %vgpr10_vgpr11, 0, 0, 0, implicit %exec, implicit %flat_scr
S_ENDPGM
...
---
# Reuse of same input pointer is OK
name: trivial_clause_load_flat4_x2_sameptr
body: |
bb.0:
; GCN-LABEL: name: trivial_clause_load_flat4_x2_sameptr
; GCN: %vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: %vgpr1 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: S_ENDPGM
%vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
%vgpr1 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
S_ENDPGM
...
---
# 32-bit load partially clobbers its own ptr reg
name: flat_load4_overwrite_ptr_lo
body: |
bb.0:
; GCN-LABEL: name: flat_load4_overwrite_ptr_lo
; GCN: %vgpr0 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: S_ENDPGM
%vgpr0 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
S_ENDPGM
...
---
# 32-bit load partially clobbers its own ptr reg
name: flat_load4_overwrite_ptr_hi
body: |
bb.0:
; GCN-LABEL: name: flat_load4_overwrite_ptr_hi
; GCN: %vgpr1 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: S_ENDPGM
%vgpr1 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
S_ENDPGM
...
---
# 64-bit load clobbers its own ptr reg
name: flat_load8_overwrite_ptr
body: |
bb.0:
; GCN-LABEL: name: flat_load8_overwrite_ptr
; GCN: %vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: S_ENDPGM
%vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
S_ENDPGM
...
---
# vmcnt has 4 bits, so maximum 16 outstanding loads. The waitcnt
# breaks the clause.
name: break_clause_at_max_clause_size_flat_load4
body: |
bb.0:
; GCN-LABEL: name: break_clause_at_max_clause_size_flat_load4
; GCN: %vgpr2 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: %vgpr3 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: %vgpr4 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: %vgpr5 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: %vgpr6 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: %vgpr7 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: %vgpr8 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: %vgpr9 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: %vgpr10 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: %vgpr11 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: %vgpr12 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: %vgpr13 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: %vgpr14 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: %vgpr15 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: %vgpr16 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: %vgpr17 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
; XNACK-NEXT: S_NOP 0
; GCN-NEXT: %vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: %sgpr0 = S_MOV_B32 %sgpr0, implicit %vgpr2, implicit %vgpr3, implicit %vgpr4, implicit %vgpr5, implicit %vgpr6, implicit %vgpr7, implicit %vgpr8, implicit %vgpr9, implicit %vgpr10, implicit %vgpr11, implicit %vgpr12, implicit %vgpr13, implicit %vgpr14, implicit %vgpr15, implicit %vgpr16, implicit %vgpr17, implicit %vgpr18
; GCN-NEXT: S_ENDPGM
%vgpr2 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
%vgpr3 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
%vgpr4 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
%vgpr5 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
%vgpr6 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
%vgpr7 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
%vgpr8 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
%vgpr9 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
%vgpr10 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
%vgpr11 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
%vgpr12 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
%vgpr13 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
%vgpr14 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
%vgpr15 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
%vgpr16 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
%vgpr17 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr
%vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
%sgpr0 = S_MOV_B32 %sgpr0, implicit %vgpr2, implicit %vgpr3, implicit %vgpr4, implicit %vgpr5, implicit %vgpr6, implicit %vgpr7, implicit %vgpr8, implicit %vgpr9, implicit %vgpr10, implicit %vgpr11, implicit %vgpr12, implicit %vgpr13, implicit %vgpr14, implicit %vgpr15, implicit %vgpr16, implicit %vgpr17, implicit %vgpr18
S_ENDPGM
...
---
name: break_clause_simple_load_flat4_lo_ptr
body: |
bb.0:
; GCN-LABEL: name: break_clause_simple_load_flat4_lo_ptr
; GCN: %vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
; XNACK-NEXT: S_NOP 0
; GCN-NEXT: %vgpr2 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: S_ENDPGM
%vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
%vgpr2 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
S_ENDPGM
...
---
name: break_clause_simple_load_flat4_hi_ptr
body: |
bb.0:
; GCN-LABEL: name: break_clause_simple_load_flat4_hi_ptr
; GCN: %vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
; XNACK-NEXT: S_NOP 0
; GCN-NEXT: %vgpr3 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: S_ENDPGM
%vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
%vgpr3 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
S_ENDPGM
...
---
name: break_clause_simple_load_flat8_ptr
body: |
bb.0:
; GCN-LABEL: name: break_clause_simple_load_flat8_ptr
; GCN: %vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
; XNACK-NEXT: S_NOP 0
; GCN-NEXT: %vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: S_ENDPGM
%vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
%vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
S_ENDPGM
...
---
name: break_clause_simple_load_flat16_ptr
body: |
bb.0:
; GCN-LABEL: name: break_clause_simple_load_flat16_ptr
; GCN: %vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
; XNACK-NEXT: S_NOP 0
; GCN-NEXT: %vgpr2_vgpr3_vgpr4_vgpr5 = FLAT_LOAD_DWORDX4 %vgpr6_vgpr7, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: S_ENDPGM
%vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
%vgpr2_vgpr3_vgpr4_vgpr5 = FLAT_LOAD_DWORDX4 %vgpr6_vgpr7, 0, 0, 0, implicit %exec, implicit %flat_scr
S_ENDPGM
...
---
# The clause is broken by the waitcnt inserted at the end of the
# block, so no nop is needed.
name: break_clause_block_boundary_load_flat8_ptr
body: |
; GCN-LABEL: name: break_clause_block_boundary_load_flat8_ptr
; GCN: bb.0:
; GCN-NEXT: successors: %bb.1(0x80000000)
; GCN: %vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN: bb.1:
; XNACK-NEXT: S_NOP 0
; GCN-NEXT: %vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: S_ENDPGM
bb.0:
%vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
bb.1:
%vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
S_ENDPGM
...
---
# The load clobbers the pointer of the store, so it needs to break.
name: break_clause_store_load_into_ptr_flat4
body: |
bb.0:
; GCN-LABEL: name: break_clause_store_load_into_ptr_flat4
; GCN: FLAT_STORE_DWORD %vgpr2_vgpr3, %vgpr0, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: %vgpr2 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: S_ENDPGM
FLAT_STORE_DWORD %vgpr2_vgpr3, %vgpr0, 0, 0, 0, implicit %exec, implicit %flat_scr
%vgpr2 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
S_ENDPGM
...
---
# The load clobbers the data of the store, so it needs to break.
# FIXME: Would it be better to s_nop and wait later?
name: break_clause_store_load_into_data_flat4
body: |
bb.0:
; GCN-LABEL: name: break_clause_store_load_into_data_flat4
; GCN: FLAT_STORE_DWORD %vgpr2_vgpr3, %vgpr0, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: %vgpr0 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: S_ENDPGM
FLAT_STORE_DWORD %vgpr2_vgpr3, %vgpr0, 0, 0, 0, implicit %exec, implicit %flat_scr
%vgpr0 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
S_ENDPGM
...
---
# Regular VALU instruction breaks clause, no nop needed
name: valu_inst_breaks_clause
body: |
bb.0:
; GCN-LABEL: name: valu_inst_breaks_clause
; GCN: %vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: %vgpr8 = V_MOV_B32_e32 0, implicit %exec
; XNACK-NEXT: S_NOP 0
; GCN-NEXT: %vgpr2 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: S_ENDPGM
%vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
%vgpr8 = V_MOV_B32_e32 0, implicit %exec
%vgpr2 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
S_ENDPGM
...
---
# Regular SALU instruction breaks clause, no nop needed
name: salu_inst_breaks_clause
body: |
bb.0:
; GCN-LABEL: name: salu_inst_breaks_clause
; GCN: %vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: %sgpr8 = S_MOV_B32 0
; XNACK-NEXT: S_NOP 0
; GCN-NEXT: %vgpr2 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: S_ENDPGM
%vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
%sgpr8 = S_MOV_B32 0
%vgpr2 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
S_ENDPGM
...
---
name: ds_inst_breaks_clause
body: |
bb.0:
; GCN-LABEL: name: ds_inst_breaks_clause
; GCN: %vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: %vgpr8 = DS_READ_B32 %vgpr9, 0, 0, implicit %m0, implicit %exec
; XNACK-NEXT: S_NOP 0
; GCN-NEXT: %vgpr2 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: S_ENDPGM
%vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
%vgpr8 = DS_READ_B32 %vgpr9, 0, 0, implicit %m0, implicit %exec
%vgpr2 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
S_ENDPGM
...
---
name: smrd_inst_breaks_clause
body: |
bb.0:
; GCN-LABEL: name: smrd_inst_breaks_clause
; GCN: %vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: %sgpr8 = S_LOAD_DWORD_IMM %sgpr0_sgpr1, 0, 0
; GCN-NEXT: %vgpr2 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: S_ENDPGM
%vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
%sgpr8 = S_LOAD_DWORD_IMM %sgpr0_sgpr1, 0, 0
%vgpr2 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
S_ENDPGM
...
---
# FIXME: Should this be handled?
name: implicit_use_breaks_clause
body: |
bb.0:
; GCN-LABEL: name: implicit_use_breaks_clause
; GCN: %vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr, implicit %vgpr4_vgpr5
; XNACK-NEXT: S_NOP 0
; GCN-NEXT: %vgpr4_vgpr5 = FLAT_LOAD_DWORDX2 %vgpr6_vgpr7, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: S_ENDPGM
%vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr, implicit %vgpr4_vgpr5
%vgpr4_vgpr5 = FLAT_LOAD_DWORDX2 %vgpr6_vgpr7, 0, 0, 0, implicit %exec, implicit %flat_scr
S_ENDPGM
...
---
name: trivial_clause_load_mubuf4_x2
body: |
bb.0:
; GCN-LABEL: name: trivial_clause_load_mubuf4_x2
; GCN: %vgpr1 = BUFFER_LOAD_DWORD_OFFEN %vgpr2, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec
; GCN-NEXT: %vgpr3 = BUFFER_LOAD_DWORD_OFFEN %vgpr4, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec
; GCN-NEXT: S_ENDPGM
%vgpr1 = BUFFER_LOAD_DWORD_OFFEN %vgpr2, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec
%vgpr3 = BUFFER_LOAD_DWORD_OFFEN %vgpr4, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec
S_ENDPGM
...
---
name: break_clause_simple_load_mubuf_offen_ptr
body: |
bb.0:
; GCN-LABEL: name: break_clause_simple_load_mubuf_offen_ptr
; GCN: %vgpr1 = BUFFER_LOAD_DWORD_OFFEN %vgpr2, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec
; XNACK-NEXT: S_NOP 0
; GCN-NEXT: %vgpr2 = BUFFER_LOAD_DWORD_OFFEN %vgpr3, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec
; GCN-NEXT: S_ENDPGM
%vgpr1 = BUFFER_LOAD_DWORD_OFFEN %vgpr2, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec
%vgpr2 = BUFFER_LOAD_DWORD_OFFEN %vgpr3, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec
S_ENDPGM
...
---
# BUFFER instructions overwriting their own inputs is supposedly OK.
name: mubuf_load4_overwrite_ptr
body: |
bb.0:
; GCN-LABEL: name: mubuf_load4_overwrite_ptr
; GCN: %vgpr0 = BUFFER_LOAD_DWORD_OFFEN %vgpr0, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec
; GCN-NEXT: %vgpr1 = V_MOV_B32_e32 0, implicit %exec
; GCN-NEXT: %vgpr2 = V_MOV_B32_e32 %vgpr0, implicit %exec
; GCN-NEXT: S_ENDPGM
%vgpr0 = BUFFER_LOAD_DWORD_OFFEN %vgpr0, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec
%vgpr1 = V_MOV_B32_e32 0, implicit %exec
%vgpr2 = V_MOV_B32_e32 %vgpr0, implicit %exec
S_ENDPGM
...
---
# Break a clause from interference between mubuf and flat instructions
name: break_clause_flat_load_mubuf_load
body: |
bb.0:
; GCN-LABEL: name: break_clause_flat_load_mubuf_load
; GCN: %vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
; XNACK-NEXT: S_NOP 0
; GCN-NEXT: %vgpr2 = BUFFER_LOAD_DWORD_OFFEN %vgpr1, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec
; GCN-NEXT: S_ENDPGM
%vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
%vgpr2 = BUFFER_LOAD_DWORD_OFFEN %vgpr1, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec
S_ENDPGM
...
# Break a clause from interference between mubuf and flat instructions
# GCN-LABEL: name: break_clause_mubuf_load_flat_load
# GCN: bb.0:
# GCN-NEXT: %vgpr0 = BUFFER_LOAD_DWORD_OFFEN %vgpr1, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4
# XNACK-NEXT: S_NOP 0
# GCN-NEXT: %vgpr1 = FLAT_LOAD_DWORD %vgpr2_vgpr3
# GCN-NEXT: S_ENDPGM
name: break_clause_mubuf_load_flat_load
body: |
bb.0:
%vgpr0 = BUFFER_LOAD_DWORD_OFFEN %vgpr1, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec
%vgpr1 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
S_ENDPGM
...
---
name: break_clause_atomic_rtn_into_ptr_flat4
body: |
bb.0:
; GCN-LABEL: name: break_clause_atomic_rtn_into_ptr_flat4
; GCN: %vgpr2 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
; XNACK-NEXT: S_NOP 0
; GCN-NEXT: %vgpr4 = FLAT_ATOMIC_ADD_RTN %vgpr5_vgpr6, %vgpr7, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: S_ENDPGM
%vgpr2 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
%vgpr4 = FLAT_ATOMIC_ADD_RTN %vgpr5_vgpr6, %vgpr7, 0, 0, implicit %exec, implicit %flat_scr
S_ENDPGM
...
---
name: break_clause_atomic_nortn_ptr_load_flat4
body: |
bb.0:
; GCN-LABEL: name: break_clause_atomic_nortn_ptr_load_flat4
; GCN: FLAT_ATOMIC_ADD %vgpr0_vgpr1, %vgpr2, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: %vgpr2 = FLAT_LOAD_DWORD %vgpr3_vgpr4, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: S_ENDPGM
FLAT_ATOMIC_ADD %vgpr0_vgpr1, %vgpr2, 0, 0, implicit %exec, implicit %flat_scr
%vgpr2 = FLAT_LOAD_DWORD %vgpr3_vgpr4, 0, 0, 0, implicit %exec, implicit %flat_scr
S_ENDPGM
...
---
name: break_clause_atomic_rtn_into_ptr_mubuf4
body: |
bb.0:
; GCN-LABEL: name: break_clause_atomic_rtn_into_ptr_mubuf4
; GCN: %vgpr1 = BUFFER_LOAD_DWORD_OFFEN %vgpr2, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec
; XNACK-NEXT: S_NOP 0
; GCN-NEXT: %vgpr2 = BUFFER_ATOMIC_ADD_OFFEN_RTN %vgpr2, %vgpr5, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, implicit %exec
; GCN-NEXT: S_ENDPGM
%vgpr1 = BUFFER_LOAD_DWORD_OFFEN %vgpr2, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec
%vgpr2 = BUFFER_ATOMIC_ADD_OFFEN_RTN %vgpr2, %vgpr5, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, implicit %exec
S_ENDPGM
...
---
name: break_clause_atomic_nortn_ptr_load_mubuf4
body: |
bb.0:
; GCN-LABEL: name: break_clause_atomic_nortn_ptr_load_mubuf4
; GCN: BUFFER_ATOMIC_ADD_OFFEN %vgpr0, %vgpr1, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, implicit %exec
; GCN-NEXT: %vgpr1 = BUFFER_LOAD_DWORD_OFFEN %vgpr2, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec
; GCN-NEXT: S_ENDPGM
BUFFER_ATOMIC_ADD_OFFEN %vgpr0, %vgpr1, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, implicit %exec
%vgpr1 = BUFFER_LOAD_DWORD_OFFEN %vgpr2, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec
S_ENDPGM
...
---
# Make sure there is no assert on mubuf instructions which do not have
# vaddr, and don't add register to track.
name: no_break_clause_mubuf_load_novaddr
body: |
bb.0:
; GCN-LABEL: name: no_break_clause_mubuf_load_novaddr
; GCN: %vgpr1 = BUFFER_LOAD_DWORD_OFFSET %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec
; GCN-NEXT: %vgpr3 = BUFFER_LOAD_DWORD_OFFSET %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec
; GCN-NEXT: S_ENDPGM
%vgpr1 = BUFFER_LOAD_DWORD_OFFSET %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec
%vgpr3 = BUFFER_LOAD_DWORD_OFFSET %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec
S_ENDPGM
...
---
# Loads and stores using different addresses theoretically does not
# need a nop
name: mix_load_store_clause
body: |
bb.0:
; GCN-LABEL: name: mix_load_store_clause
; GCN: FLAT_STORE_DWORD %vgpr0_vgpr1, %vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: %vgpr10 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
; XNACK-NEXT: S_NOP 0
; GCN-NEXT: FLAT_STORE_DWORD %vgpr2_vgpr3, %vgpr6, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: %vgpr11 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
FLAT_STORE_DWORD %vgpr0_vgpr1, %vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
%vgpr10 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
FLAT_STORE_DWORD %vgpr2_vgpr3, %vgpr6, 0, 0, 0, implicit %exec, implicit %flat_scr
%vgpr11 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
S_ENDPGM
...
---
# Loads and stores using the same address needs a nop.
name: mix_load_store_clause_same_address
body: |
bb.0:
; GCN-LABEL: name: mix_load_store_clause_same_address
; GCN: FLAT_STORE_DWORD %vgpr0_vgpr1, %vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: %vgpr10 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
; XNACK-NEXT: S_NOP 0
; GCN-NEXT: FLAT_STORE_DWORD %vgpr0_vgpr1, %vgpr6, 0, 0, 0, implicit %exec, implicit %flat_scr
; GCN-NEXT: %vgpr11 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
FLAT_STORE_DWORD %vgpr0_vgpr1, %vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
%vgpr10 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
FLAT_STORE_DWORD %vgpr0_vgpr1, %vgpr6, 0, 0, 0, implicit %exec, implicit %flat_scr
%vgpr11 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
S_ENDPGM
...

View File

@ -437,22 +437,22 @@ body: |
# GCN-LABEL: bb.0:
# GCN: S_MOV_B32
# GFX9: S_NOP
# GFX9-NEXT: S_NOP
# GCN-NEXT: V_INTERP_P1_F32
# GCN-LABEL: bb.1:
# GCN: S_MOV_B32
# GFX9: S_NOP
# GFX9-NEXT: S_NOP
# GCN-NEXT: V_INTERP_P2_F32
# GCN-LABEL: bb.2:
# GCN: S_MOV_B32
# GFX9: S_NOP
# GFX9-NEXT: S_NOP
# GCN-NEXT: V_INTERP_P1_F32_16bank
# GCN-LABEL: bb.3:
# GCN: S_MOV_B32
# GFX9: S_NOP
# GFX9-NEXT: S_NOP
# GCN-NEXT: V_INTERP_MOV_F32
name: v_interp

View File

@ -0,0 +1,49 @@
# RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,GFX9 %s
# RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,VI %s
# RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,CI %s
# RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,SI %s
---
name: m0_sendmsg
body: |
; GCN-LABEL: name: m0_sendmsg
; GCN: %m0 = S_MOV_B32 -1
; VI-NEXT: S_NOP 0
; GFX9-NEXT: S_NOP 0
; GCN-NEXT: S_SENDMSG 3, implicit %exec, implicit %m0
bb.0:
%m0 = S_MOV_B32 -1
S_SENDMSG 3, implicit %exec, implicit %m0
S_ENDPGM
...
---
name: m0_sendmsghalt
body: |
; GCN-LABEL: name: m0_sendmsghalt
; GCN: %m0 = S_MOV_B32 -1
; VI-NEXT: S_NOP 0
; GFX9-NEXT: S_NOP 0
; GCN-NEXT: S_SENDMSGHALT 3, implicit %exec, implicit %m0
bb.0:
%m0 = S_MOV_B32 -1
S_SENDMSGHALT 3, implicit %exec, implicit %m0
S_ENDPGM
...
---
name: m0_ttracedata
body: |
; GCN-LABEL: name: m0_ttracedata
; GCN: %m0 = S_MOV_B32 -1
; VI-NEXT: S_NOP 0
; GFX9-NEXT: S_NOP 0
; GCN-NEXT: S_TTRACEDATA implicit %m0
bb.0:
%m0 = S_MOV_B32 -1
S_TTRACEDATA implicit %m0
S_ENDPGM
...