AMDGPU] gfx1010 hazard recognizer

Differential Revision: https://reviews.llvm.org/D61536

llvm-svn: 359961
This commit is contained in:
Stanislav Mekhanoshin 2019-05-04 04:30:57 +00:00
parent cf9bd8ade7
commit 51d1415a16
7 changed files with 1172 additions and 3 deletions

View File

@ -20,6 +20,7 @@
#include "llvm/ADT/iterator_range.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/MC/MCInstrDesc.h"
@ -133,6 +134,12 @@ GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
&& checkVMEMHazards(MI) > 0)
return NoopHazard;
if (ST.hasNSAtoVMEMBug() && checkNSAtoVMEMHazard(MI) > 0)
return NoopHazard;
if (ST.hasNoDataDepHazard())
return NoHazard;
if (SIInstrInfo::isVALU(*MI) && checkVALUHazards(MI) > 0)
return NoopHazard;
@ -181,6 +188,12 @@ unsigned GCNHazardRecognizer::PreEmitNoops(MachineInstr *MI) {
IsHazardRecognizerMode = true;
CurrCycleInstr = MI;
unsigned W = PreEmitNoopsCommon(MI);
fixVMEMtoScalarWriteHazards(MI);
fixSMEMtoVectorWriteHazards(MI);
fixVcmpxExecWARHazard(MI);
fixLdsBranchVmemWARHazard(MI);
CurrCycleInstr = nullptr;
return W;
}
@ -191,12 +204,18 @@ unsigned GCNHazardRecognizer::PreEmitNoopsCommon(MachineInstr *MI) {
if (SIInstrInfo::isSMRD(*MI))
return std::max(WaitStates, checkSMRDHazards(MI));
if (SIInstrInfo::isVALU(*MI))
WaitStates = std::max(WaitStates, checkVALUHazards(MI));
if (SIInstrInfo::isVMEM(*MI) || SIInstrInfo::isFLAT(*MI))
WaitStates = std::max(WaitStates, checkVMEMHazards(MI));
if (ST.hasNSAtoVMEMBug())
WaitStates = std::max(WaitStates, checkNSAtoVMEMHazard(MI));
if (ST.hasNoDataDepHazard())
return WaitStates;
if (SIInstrInfo::isVALU(*MI))
WaitStates = std::max(WaitStates, checkVALUHazards(MI));
if (SIInstrInfo::isDPP(*MI))
WaitStates = std::max(WaitStates, checkDPPHazards(MI));
@ -775,3 +794,243 @@ int GCNHazardRecognizer::checkReadM0Hazards(MachineInstr *MI) {
return SMovRelWaitStates - getWaitStatesSinceDef(AMDGPU::M0, IsHazardFn,
SMovRelWaitStates);
}
bool GCNHazardRecognizer::fixVMEMtoScalarWriteHazards(MachineInstr *MI) {
if (!ST.hasVMEMtoScalarWriteHazard())
return false;
if (!SIInstrInfo::isSALU(*MI) && !SIInstrInfo::isSMRD(*MI))
return false;
if (MI->getNumDefs() == 0)
return false;
const SIRegisterInfo *TRI = ST.getRegisterInfo();
auto IsHazardFn = [TRI, MI] (MachineInstr *I) {
if (!SIInstrInfo::isVMEM(*I) && !SIInstrInfo::isDS(*I) &&
!SIInstrInfo::isFLAT(*I))
return false;
for (const MachineOperand &Def : MI->defs()) {
MachineOperand *Op = I->findRegisterUseOperand(Def.getReg(), false, TRI);
if (!Op || (Op->isImplicit() && Op->getReg() == AMDGPU::EXEC))
continue;
return true;
}
return false;
};
auto IsExpiredFn = [] (MachineInstr *MI, int) {
return MI && (SIInstrInfo::isVALU(*MI) ||
(MI->getOpcode() == AMDGPU::S_WAITCNT &&
!MI->getOperand(0).getImm()));
};
if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
std::numeric_limits<int>::max())
return false;
const SIInstrInfo *TII = ST.getInstrInfo();
BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII->get(AMDGPU::V_NOP_e32));
return true;
}
bool GCNHazardRecognizer::fixSMEMtoVectorWriteHazards(MachineInstr *MI) {
if (!ST.hasSMEMtoVectorWriteHazard())
return false;
if (!SIInstrInfo::isVALU(*MI))
return false;
unsigned SDSTName;
switch (MI->getOpcode()) {
case AMDGPU::V_READLANE_B32:
case AMDGPU::V_READFIRSTLANE_B32:
SDSTName = AMDGPU::OpName::vdst;
break;
default:
SDSTName = AMDGPU::OpName::sdst;
break;
}
const SIInstrInfo *TII = ST.getInstrInfo();
const SIRegisterInfo *TRI = ST.getRegisterInfo();
const MachineOperand *SDST = TII->getNamedOperand(*MI, SDSTName);
if (!SDST) {
for (auto MO : MI->implicit_operands()) {
if (MO.isDef() && TRI->isSGPRClass(TRI->getPhysRegClass(MO.getReg()))) {
SDST = &MO;
break;
}
}
}
if (!SDST)
return false;
const unsigned SDSTReg = SDST->getReg();
auto IsHazardFn = [SDSTReg, TRI] (MachineInstr *I) {
return SIInstrInfo::isSMRD(*I) && I->readsRegister(SDSTReg, TRI);
};
// This assumes that there will be s_waitcnt lgkmcnt(0) or equivalent
// between any at risk SMEM and any SALU dependent on the SMEM results.
auto IsExpiredFn = [TII] (MachineInstr *MI, int) {
if (MI) {
if (TII->isSALU(*MI)) {
if (TII->isSOPP(*MI))
return false;
switch (MI->getOpcode()) {
case AMDGPU::S_SETVSKIP:
case AMDGPU::S_VERSION:
case AMDGPU::S_WAITCNT_VSCNT:
case AMDGPU::S_WAITCNT_VMCNT:
case AMDGPU::S_WAITCNT_EXPCNT:
case AMDGPU::S_WAITCNT_LGKMCNT:
return false;
default:
return true;
}
}
}
return false;
};
if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
std::numeric_limits<int>::max())
return false;
BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
TII->get(AMDGPU::S_MOV_B32), AMDGPU::SGPR_NULL)
.addImm(0);
return true;
}
bool GCNHazardRecognizer::fixVcmpxExecWARHazard(MachineInstr *MI) {
if (!ST.hasVcmpxExecWARHazard() || !SIInstrInfo::isVALU(*MI))
return false;
const SIRegisterInfo *TRI = ST.getRegisterInfo();
if (!MI->modifiesRegister(AMDGPU::EXEC, TRI))
return false;
auto IsHazardFn = [TRI] (MachineInstr *I) {
if (SIInstrInfo::isVALU(*I))
return false;
return I->readsRegister(AMDGPU::EXEC, TRI);
};
const SIInstrInfo *TII = ST.getInstrInfo();
auto IsExpiredFn = [TII, TRI] (MachineInstr *MI, int) {
if (!MI)
return false;
if (SIInstrInfo::isVALU(*MI)) {
if (TII->getNamedOperand(*MI, AMDGPU::OpName::sdst))
return true;
for (auto MO : MI->implicit_operands())
if (MO.isDef() && TRI->isSGPRClass(TRI->getPhysRegClass(MO.getReg())))
return true;
}
if (MI->getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
(MI->getOperand(0).getImm() & 0xfffe) == 0xfffe)
return true;
return false;
};
if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
std::numeric_limits<int>::max())
return false;
BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
TII->get(AMDGPU::S_WAITCNT_DEPCTR))
.addImm(0xfffe);
return true;
}
bool GCNHazardRecognizer::fixLdsBranchVmemWARHazard(MachineInstr *MI) {
if (!ST.hasLdsBranchVmemWARHazard())
return false;
auto IsHazardInst = [] (const MachineInstr *MI) {
if (SIInstrInfo::isDS(*MI))
return 1;
if (SIInstrInfo::isVMEM(*MI) || SIInstrInfo::isSegmentSpecificFLAT(*MI))
return 2;
return 0;
};
auto InstType = IsHazardInst(MI);
if (!InstType)
return false;
auto IsExpiredFn = [&IsHazardInst] (MachineInstr *I, int) {
return I && (IsHazardInst(I) ||
(I->getOpcode() == AMDGPU::S_WAITCNT_VSCNT &&
I->getOperand(0).getReg() == AMDGPU::SGPR_NULL &&
!I->getOperand(1).getImm()));
};
auto IsHazardFn = [InstType, &IsHazardInst] (MachineInstr *I) {
if (!I->isBranch())
return false;
auto IsHazardFn = [InstType, IsHazardInst] (MachineInstr *I) {
auto InstType2 = IsHazardInst(I);
return InstType2 && InstType != InstType2;
};
auto IsExpiredFn = [InstType, &IsHazardInst] (MachineInstr *I, int) {
if (!I)
return false;
auto InstType2 = IsHazardInst(I);
if (InstType == InstType2)
return true;
return I->getOpcode() == AMDGPU::S_WAITCNT_VSCNT &&
I->getOperand(0).getReg() == AMDGPU::SGPR_NULL &&
!I->getOperand(1).getImm();
};
return ::getWaitStatesSince(IsHazardFn, I, IsExpiredFn) !=
std::numeric_limits<int>::max();
};
if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
std::numeric_limits<int>::max())
return false;
const SIInstrInfo *TII = ST.getInstrInfo();
BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
TII->get(AMDGPU::S_WAITCNT_VSCNT))
.addReg(AMDGPU::SGPR_NULL, RegState::Undef)
.addImm(0);
return true;
}
int GCNHazardRecognizer::checkNSAtoVMEMHazard(MachineInstr *MI) {
int NSAtoVMEMWaitStates = 1;
if (!ST.hasNSAtoVMEMBug())
return 0;
if (!SIInstrInfo::isMUBUF(*MI) && !SIInstrInfo::isMTBUF(*MI))
return 0;
const SIInstrInfo *TII = ST.getInstrInfo();
const auto *Offset = TII->getNamedOperand(*MI, AMDGPU::OpName::offset);
if (!Offset || (Offset->getImm() & 6) == 0)
return 0;
auto IsHazardFn = [TII] (MachineInstr *I) {
if (!SIInstrInfo::isMIMG(*I))
return false;
const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(I->getOpcode());
return Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA &&
TII->getInstSizeInBytes(*I) >= 16;
};
return NSAtoVMEMWaitStates - getWaitStatesSince(IsHazardFn, 1);
}

View File

@ -79,6 +79,12 @@ private:
int checkInlineAsmHazards(MachineInstr *IA);
int checkAnyInstHazards(MachineInstr *MI);
int checkReadM0Hazards(MachineInstr *SMovRel);
int checkNSAtoVMEMHazard(MachineInstr *MI);
bool fixVMEMtoScalarWriteHazards(MachineInstr *MI);
bool fixSMEMtoVectorWriteHazards(MachineInstr *MI);
bool fixVcmpxExecWARHazard(MachineInstr *MI);
bool fixLdsBranchVmemWARHazard(MachineInstr *MI);
public:
GCNHazardRecognizer(const MachineFunction &MF);
// We can only issue one instruction per cycle.

View File

@ -0,0 +1,276 @@
# RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass post-RA-hazard-rec -o - %s | FileCheck -check-prefix=GCN %s
# GCN-LABEL: name: hazard_lds_branch_buf
# GCN: S_WAITCNT_VSCNT undef $sgpr_null, 0
# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN
---
name: hazard_lds_branch_buf
body: |
bb.0:
successors: %bb.1
$vgpr1 = DS_READ_B32 undef $vgpr0, 0, 0, implicit $m0, implicit $exec
S_BRANCH %bb.1
bb.1:
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
# GCN-LABEL: name: hazard_buf_branch_lds
# GCN: S_WAITCNT_VSCNT undef $sgpr_null, 0
# GCN-NEXT: DS_READ_B32
---
name: hazard_buf_branch_lds
body: |
bb.0:
successors: %bb.1
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, implicit $exec
S_BRANCH %bb.1
bb.1:
$vgpr1 = DS_READ_B32 undef $vgpr0, 0, 0, implicit $m0, implicit $exec
S_ENDPGM 0
...
# GCN-LABEL: name: no_hazard_lds_branch_lds
# GCN: bb.1:
# GCN-NEXT: DS_READ_B32
---
name: no_hazard_lds_branch_lds
body: |
bb.0:
successors: %bb.1
$vgpr1 = DS_READ_B32 undef $vgpr0, 0, 0, implicit $m0, implicit $exec
S_BRANCH %bb.1
bb.1:
$vgpr1 = DS_READ_B32 undef $vgpr0, 0, 0, implicit $m0, implicit $exec
S_ENDPGM 0
...
# GCN-LABEL: name: no_hazard_buf_branch_buf
# GCN: bb.1:
# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN
---
name: no_hazard_buf_branch_buf
body: |
bb.0:
successors: %bb.1
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, implicit $exec
S_BRANCH %bb.1
bb.1:
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
# GCN-LABEL: name: no_hazard_lds_branch_buf_fallthrough
# GCN: bb.1:
# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN
---
name: no_hazard_lds_branch_buf_fallthrough
body: |
bb.0:
successors: %bb.1
$vgpr1 = DS_READ_B32 undef $vgpr0, 0, 0, implicit $m0, implicit $exec
bb.1:
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
# GCN-LABEL: name: no_hazard_lds_branch_buf_samebb
# GCN: DS_READ_B32
# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN
---
name: no_hazard_lds_branch_buf_samebb
body: |
bb.0:
$vgpr1 = DS_READ_B32 undef $vgpr0, 0, 0, implicit $m0, implicit $exec
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
# GCN-LABEL: name: hazard_lds_branch_buf_loop
# GCN: S_WAITCNT_VSCNT undef $sgpr_null, 0
# GCN-NEXT: DS_READ_B32
# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN
---
name: hazard_lds_branch_buf_loop
body: |
bb.0:
successors: %bb.0
$vgpr1 = DS_READ_B32 undef $vgpr0, 0, 0, implicit $m0, implicit $exec
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, implicit $exec
S_BRANCH %bb.0
...
# GCN-LABEL: name: single_hazard_lds_branch_buf
# GCN: S_WAITCNT_VSCNT undef $sgpr_null, 0
# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN
# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN
---
name: single_hazard_lds_branch_buf
body: |
bb.0:
successors: %bb.1
$vgpr1 = DS_READ_B32 undef $vgpr0, 0, 0, implicit $m0, implicit $exec
S_BRANCH %bb.1
bb.1:
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, implicit $exec
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
# GCN-LABEL: name: no_hazard_lds_branch_lds_buf
# GCN: bb.1:
# GCN-NEXT: DS_READ_B32
# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN
---
name: no_hazard_lds_branch_lds_buf
body: |
bb.0:
successors: %bb.1
$vgpr1 = DS_READ_B32 undef $vgpr0, 0, 0, implicit $m0, implicit $exec
S_BRANCH %bb.1
bb.1:
$vgpr1 = DS_READ_B32 undef $vgpr0, 0, 0, implicit $m0, implicit $exec
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
# GCN-LABEL: name: no_hazard_lds_buf_branch_buf
# GCN: bb.1:
# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN
---
name: no_hazard_lds_buf_branch_buf
body: |
bb.0:
successors: %bb.1
$vgpr1 = DS_READ_B32 undef $vgpr0, 0, 0, implicit $m0, implicit $exec
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, implicit $exec
S_BRANCH %bb.1
bb.1:
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
# GCN-LABEL: name: hazard_lds_branch_vscnt_1_buf
# GCN: S_WAITCNT_VSCNT undef $sgpr_null, 0
# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN
---
name: hazard_lds_branch_vscnt_1_buf
body: |
bb.0:
successors: %bb.1
$vgpr1 = DS_READ_B32 undef $vgpr0, 0, 0, implicit $m0, implicit $exec
S_BRANCH %bb.1
bb.1:
S_WAITCNT_VSCNT undef $sgpr_null, 1
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
# GCN-LABEL: name: no_hazard_lds_branch_vscnt_0_buf
# GCN: bb.1:
# GCN-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0
# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN
---
name: no_hazard_lds_branch_vscnt_0_buf
body: |
bb.0:
successors: %bb.1
$vgpr1 = DS_READ_B32 undef $vgpr0, 0, 0, implicit $m0, implicit $exec
S_BRANCH %bb.1
bb.1:
S_WAITCNT_VSCNT undef $sgpr_null, 0
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
# GCN-LABEL: name: hazard_lds_branch_vscnt_s0_buf
# GCN: S_WAITCNT_VSCNT undef $sgpr_null, 0
# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN
---
name: hazard_lds_branch_vscnt_s0_buf
body: |
bb.0:
successors: %bb.1
$vgpr1 = DS_READ_B32 undef $vgpr0, 0, 0, implicit $m0, implicit $exec
S_BRANCH %bb.1
bb.1:
S_WAITCNT_VSCNT undef $sgpr0, 0
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
# GCN-LABEL: name: no_hazard_lds_vscnt_0_branch_buf
# GCN: bb.1:
# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN
---
name: no_hazard_lds_vscnt_0_branch_buf
body: |
bb.0:
successors: %bb.1
$vgpr1 = DS_READ_B32 undef $vgpr0, 0, 0, implicit $m0, implicit $exec
S_WAITCNT_VSCNT undef $sgpr_null, 0
S_BRANCH %bb.1
bb.1:
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
# GCN-LABEL: name: hazard_lds_branch_global
# GCN: S_WAITCNT_VSCNT undef $sgpr_null, 0
# GCN-NEXT: GLOBAL_LOAD_DWORD
---
name: hazard_lds_branch_global
body: |
bb.0:
successors: %bb.1
$vgpr1 = DS_READ_B32 undef $vgpr0, 0, 0, implicit $m0, implicit $exec
S_BRANCH %bb.1
bb.1:
$vgpr1 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
# GCN-LABEL: name: hazard_lds_branch_scratch
# GCN: S_WAITCNT_VSCNT undef $sgpr_null, 0
# GCN-NEXT: SCRATCH_LOAD_DWORD
---
name: hazard_lds_branch_scratch
body: |
bb.0:
successors: %bb.1
$vgpr1 = DS_READ_B32 undef $vgpr0, 0, 0, implicit $m0, implicit $exec
S_BRANCH %bb.1
bb.1:
$vgpr1 = SCRATCH_LOAD_DWORD undef $vgpr0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
# GCN-LABEL: name: no_hazard_lds_branch_flat
# GCN: bb.1:
# GCN-NEXT: FLAT_LOAD_DWORD
---
name: no_hazard_lds_branch_flat
body: |
bb.0:
successors: %bb.1
$vgpr1 = DS_READ_B32 undef $vgpr0, 0, 0, implicit $m0, implicit $exec
S_BRANCH %bb.1
bb.1:
$vgpr1 = FLAT_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...

View File

@ -0,0 +1,61 @@
# RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass post-RA-hazard-rec -o - %s | FileCheck -check-prefix=GCN %s
# GCN-LABEL: name: hazard_image_sample_d_buf_off6
# GCN: IMAGE_SAMPLE
# GCN-NEXT: S_NOP 0
# GCN-NEXT: BUFFER_LOAD_DWORD_OFFSET
---
name: hazard_image_sample_d_buf_off6
body: |
bb.0:
$vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 undef $vgpr3, undef $vgpr8, undef $vgpr7, undef $vgpr5, undef $vgpr4, undef $vgpr6, undef $vgpr0, undef $vgpr2, undef $vgpr2, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, undef $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
$vgpr1 = BUFFER_LOAD_DWORD_OFFSET undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 6, 0, 0, 0, 0, implicit $exec
...
# GCN-LABEL: name: no_hazard_image_sample_d_buf_off1
# GCN: IMAGE_SAMPLE
# GCN-NEXT: BUFFER_LOAD_DWORD_OFFSET
---
name: no_hazard_image_sample_d_buf_off1
body: |
bb.0:
$vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 undef $vgpr3, undef $vgpr8, undef $vgpr7, undef $vgpr5, undef $vgpr4, undef $vgpr6, undef $vgpr0, undef $vgpr2, undef $vgpr2, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, undef $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
$vgpr1 = BUFFER_LOAD_DWORD_OFFSET undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 1, 0, 0, 0, 0, implicit $exec
...
# GCN-LABEL: name: no_hazard_image_sample_d_buf_far
# GCN: IMAGE_SAMPLE
# GCN-NEXT: V_NOP_e32
# GCN-NEXT: BUFFER_LOAD_DWORD_OFFSET
---
name: no_hazard_image_sample_d_buf_far
body: |
bb.0:
$vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 undef $vgpr3, undef $vgpr8, undef $vgpr7, undef $vgpr5, undef $vgpr4, undef $vgpr6, undef $vgpr0, undef $vgpr2, undef $vgpr2, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, undef $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
V_NOP_e32 implicit $exec
$vgpr1 = BUFFER_LOAD_DWORD_OFFSET undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 6, 0, 0, 0, 0, implicit $exec
...
# Non-NSA
# GCN-LABEL: name: no_hazard_image_sample_v4_v2_buf_off6
# GCN: IMAGE_SAMPLE
# GCN-NEXT: BUFFER_LOAD_DWORD_OFFSET
---
name: no_hazard_image_sample_v4_v2_buf_off6
body: |
bb.0:
$vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_V4_V2_gfx10 undef $vgpr1_vgpr2, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, undef $sgpr8_sgpr9_sgpr10_sgpr11, 15, 1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
$vgpr1 = BUFFER_LOAD_DWORD_OFFSET undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 6, 0, 0, 0, 0, implicit $exec
...
# Less than 4 dwords
# GCN-LABEL: name: no_hazard_image_sample_v4_v3_buf_off6
# GCN: IMAGE_SAMPLE
# GCN-NEXT: BUFFER_LOAD_DWORD_OFFSET
---
name: no_hazard_image_sample_v4_v3_buf_off6
body: |
bb.0:
$vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_V4_V3_nsa_gfx10 undef $vgpr1, undef $vgpr2, undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, undef $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
$vgpr1 = BUFFER_LOAD_DWORD_OFFSET undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 6, 0, 0, 0, 0, implicit $exec
...

View File

@ -0,0 +1,193 @@
# RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass post-RA-hazard-rec -o - %s | FileCheck -check-prefix=GCN %s
# GCN-LABEL: name: hazard_smem_war
# GCN: S_LOAD_DWORD_IMM
# GCN: $sgpr_null = S_MOV_B32 0
# GCN-NEXT: V_CMP_EQ_F32
---
name: hazard_smem_war
body: |
bb.0:
liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
$sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
$sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $exec
S_ENDPGM 0
...
# GCN-LABEL: name: hazard_smem_war_no_hazard
# GCN: S_LOAD_DWORD_IMM
# GCN-NEXT: S_ADD_U32
# GCN-NEXT: V_CMP_EQ_F32
---
name: hazard_smem_war_no_hazard
body: |
bb.0:
liveins: $sgpr0, $sgpr1, $sgpr4, $sgpr5, $vgpr0, $vgpr1
$sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
$sgpr3 = S_ADD_U32 $sgpr4, $sgpr5, implicit-def $scc
$sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $exec
S_ENDPGM 0
...
# GCN-LABEL: name: hazard_smem_war_related_clause
# GCN: S_LOAD_DWORD_IMM
# GCN: S_WAITCNT
# GCN: S_ADD_U32
# GCN-NEXT: V_CMP_EQ_F32
---
name: hazard_smem_war_related_clause
body: |
bb.0:
liveins: $sgpr0, $sgpr1, $sgpr4, $vgpr0, $vgpr1
$sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
S_WAITCNT 0
$sgpr3 = S_ADD_U32 $sgpr2, $sgpr4, implicit-def $scc
$sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $exec
S_ENDPGM 0
...
# GCN-LABEL: name: hazard_smem_war_branch
# GCN: S_LOAD_DWORD_IMM
# GCN: $sgpr_null = S_MOV_B32 0
# GCN-NEXT: V_CMP_EQ_F32
---
name: hazard_smem_war_branch
body: |
bb.0:
liveins: $sgpr0, $sgpr1, $sgpr4, $vgpr0, $vgpr1
successors: %bb.1
$sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
S_BRANCH %bb.1
bb.1:
liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1
$sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $exec
S_ENDPGM 0
...
# GCN-LABEL: name: hazard_smem_war_cbranch
# GCN: S_AND_B64
# GCN: S_LOAD_DWORD_IMM
# GCN: S_CBRANCH_VCCZ
# GCN-NOT: $sgpr_null = S_MOV_B32 0
# GCN: V_CMP_EQ_F32
# GCN: S_ENDPGM 0
# GCN: $sgpr_null = S_MOV_B32 0
# GCN-NEXT: V_CMP_EQ_F32
---
name: hazard_smem_war_cbranch
body: |
bb.0:
liveins: $sgpr0, $sgpr1, $sgpr4, $sgpr5, $vgpr0, $vgpr1
successors: %bb.1, %bb.2
$vcc = S_AND_B64 $sgpr4_sgpr5, $sgpr4_sgpr5, implicit-def $scc
$sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
S_CBRANCH_VCCZ %bb.2, implicit killed $vcc
bb.1:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr4, $sgpr5, $vgpr0, $vgpr1
$sgpr4_sgpr5 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $exec
S_ENDPGM 0
bb.2:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr4, $sgpr5, $vgpr0, $vgpr1
$sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $exec
S_ENDPGM 0
...
# GCN-LABEL: name: hazard_smem_war_cbranch_carry
# GCN: S_AND_B64
# GCN: S_LOAD_DWORD_IMM
# GCN: S_CBRANCH_VCCZ
# GCN-NOT: $sgpr_null = S_MOV_B32 0
# GCN: V_CMP_EQ_F32
# GCN-NEXT: S_ENDPGM 0
# GCN-NOT: $sgpr_null = S_MOV_B32 0
# GCN: V_CMP_EQ_F32
# GCN: $sgpr_null = S_MOV_B32 0
# GCN-NEXT: V_CMP_EQ_F32
---
name: hazard_smem_war_cbranch_carry
body: |
bb.0:
liveins: $sgpr0, $sgpr1, $sgpr4, $sgpr5, $vgpr0, $vgpr1
successors: %bb.1, %bb.2
$vcc = S_AND_B64 $sgpr4_sgpr5, $sgpr4_sgpr5, implicit-def $scc
$sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
S_CBRANCH_VCCZ %bb.2, implicit killed $vcc
bb.1:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr4, $sgpr5, $vgpr0, $vgpr1
$sgpr4_sgpr5 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $exec
S_ENDPGM 0
bb.2:
successors: %bb.3
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr4, $sgpr5, $vgpr0, $vgpr1
$sgpr4_sgpr5 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $exec
bb.3:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr4, $sgpr5, $vgpr0, $vgpr1
$sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $exec
S_ENDPGM 0
...
# GCN-LABEL: name: hazard_smem_war_backedge
# GCN: $sgpr_null = S_MOV_B32 0
# GCN-NEXT: V_CMP_EQ_F32
# GCN: S_LOAD_DWORD_IMM
---
name: hazard_smem_war_backedge
body: |
bb.0:
liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1
successors: %bb.1
$sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $exec
bb.1:
liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1
$sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
S_BRANCH %bb.0
...
# GCN-LABEL: name: hazard_smem_war_impdef
# GCN: S_LOAD_DWORD_IMM
# GCN: $sgpr_null = S_MOV_B32 0
# GCN-NEXT: V_CMP_EQ_F32
---
name: hazard_smem_war_impdef
body: |
bb.0:
liveins: $vcc, $vgpr0
$sgpr0 = S_LOAD_DWORD_IMM $vcc, 0, 0, 0
V_CMP_EQ_F32_e32 $vgpr0, $vgpr0, implicit-def $vcc, implicit $exec
S_ENDPGM 0
...
# GCN-LABEL: name: hazard_smem_war_readlane
# GCN: S_LOAD_DWORD_IMM
# GCN: $sgpr_null = S_MOV_B32 0
# GCN-NEXT: V_READLANE_B32
---
name: hazard_smem_war_readlane
body: |
bb.0:
liveins: $sgpr0, $sgpr1, $sgpr3, $vgpr0
$sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
$sgpr0 = V_READLANE_B32 $vgpr0, $sgpr3
S_ENDPGM 0
...
# GCN-LABEL: name: hazard_smem_war_readfirstlane
# GCN: S_LOAD_DWORD_IMM
# GCN: $sgpr_null = S_MOV_B32 0
# GCN-NEXT: V_READFIRSTLANE_B32
---
name: hazard_smem_war_readfirstlane
body: |
bb.0:
liveins: $sgpr0, $sgpr1, $vgpr0
$sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
$sgpr0 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
S_ENDPGM 0
...

View File

@ -0,0 +1,164 @@
# RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass si-insert-skips,post-RA-hazard-rec -o - %s | FileCheck -check-prefix=GCN %s
# GCN-LABEL: name: hazard_vcmpx_smov_exec_lo
# GCN: $sgpr0 = S_MOV_B32 $exec_lo
# GCN-NEXT: S_WAITCNT_DEPCTR 65534
# GCN-NEXT: V_CMPX_LE_F32_nosdst_e32
---
name: hazard_vcmpx_smov_exec_lo
body: |
bb.0:
successors: %bb.1
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
$sgpr0 = S_MOV_B32 $exec_lo
SI_KILL_F32_COND_IMM_TERMINATOR $vgpr0, 0, 3, implicit-def $exec, implicit-def $vcc, implicit-def $scc, implicit $exec
S_BRANCH %bb.1
bb.1:
S_ENDPGM 0
...
# GCN-LABEL: name: hazard_vcmpx_smov_exec
# GCN: $sgpr0_sgpr1 = S_MOV_B64 $exec
# GCN-NEXT: S_WAITCNT_DEPCTR 65534
# GCN-NEXT: V_CMPX_LE_F32_nosdst_e32
---
name: hazard_vcmpx_smov_exec
body: |
bb.0:
successors: %bb.1
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
$sgpr0_sgpr1 = S_MOV_B64 $exec
SI_KILL_F32_COND_IMM_TERMINATOR $vgpr0, 0, 3, implicit-def $exec, implicit-def $vcc, implicit-def $scc, implicit $exec
S_BRANCH %bb.1
bb.1:
S_ENDPGM 0
...
# GCN-LABEL: name: no_hazard_vcmpx_vmov_exec_lo
# GCN: $vgpr0 = V_MOV_B32_e32 $exec_lo, implicit $exec
# GCN-NEXT: V_CMPX_LE_F32_nosdst_e32
---
name: no_hazard_vcmpx_vmov_exec_lo
body: |
bb.0:
successors: %bb.1
$vgpr0 = V_MOV_B32_e32 $exec_lo, implicit $exec
SI_KILL_F32_COND_IMM_TERMINATOR $vgpr0, 0, 3, implicit-def $exec, implicit-def $vcc, implicit-def $scc, implicit $exec
S_BRANCH %bb.1
bb.1:
S_ENDPGM 0
...
# GCN-LABEL: name: no_hazard_vcmpx_valu_impuse_exec
# GCN: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
# GCN-NEXT: V_CMPX_LE_F32_nosdst_e32
---
name: no_hazard_vcmpx_valu_impuse_exec
body: |
bb.0:
successors: %bb.1
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
SI_KILL_F32_COND_IMM_TERMINATOR $vgpr0, 0, 3, implicit-def $exec, implicit-def $vcc, implicit-def $scc, implicit $exec
S_BRANCH %bb.1
bb.1:
S_ENDPGM 0
...
# GCN-LABEL: name: no_hazard_vcmpx_smov_exec_lo_valu_writes_sgpr_imp
# GCN: $sgpr0 = S_MOV_B32 $exec_lo
# GCN-NEXT: $vgpr0 = V_ADDC_U32_e32 0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec
# GCN-NEXT: V_CMPX_LE_F32_nosdst_e32
---
name: no_hazard_vcmpx_smov_exec_lo_valu_writes_sgpr_imp
body: |
bb.0:
successors: %bb.1
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
$sgpr0 = S_MOV_B32 $exec_lo
$vgpr0 = V_ADDC_U32_e32 0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec
SI_KILL_F32_COND_IMM_TERMINATOR $vgpr0, 0, 3, implicit-def $exec, implicit-def $vcc, implicit-def $scc, implicit $exec
S_BRANCH %bb.1
bb.1:
S_ENDPGM 0
...
# GCN-LABEL: name: no_hazard_vcmpx_smov_exec_lo_valu_writes_sgpr_exp
# GCN: $sgpr0 = S_MOV_B32 $exec_lo
# GCN-NEXT: $sgpr0_sgpr1 = V_CMP_EQ_U32_e64 $vgpr0, 0, implicit $exec
# GCN-NEXT: V_CMPX_LE_F32_nosdst_e32
---
name: no_hazard_vcmpx_smov_exec_lo_valu_writes_sgpr_exp
body: |
bb.0:
successors: %bb.1
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
$sgpr0 = S_MOV_B32 $exec_lo
$sgpr0_sgpr1 = V_CMP_EQ_U32_e64 $vgpr0, 0, implicit $exec
SI_KILL_F32_COND_IMM_TERMINATOR $vgpr0, 0, 3, implicit-def $exec, implicit-def $vcc, implicit-def $scc, implicit $exec
S_BRANCH %bb.1
bb.1:
S_ENDPGM 0
...
# GCN-LABEL: name: no_hazard_vcmpx_smov_exec_lo_depctr_fffe
# GCN: $sgpr0 = S_MOV_B32 $exec_lo
# GCN-NEXT: S_WAITCNT_DEPCTR 65534
# GCN-NEXT: V_CMPX_LE_F32_nosdst_e32
---
name: no_hazard_vcmpx_smov_exec_lo_depctr_fffe
body: |
bb.0:
successors: %bb.1
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
$sgpr0 = S_MOV_B32 $exec_lo
S_WAITCNT_DEPCTR 65534
SI_KILL_F32_COND_IMM_TERMINATOR $vgpr0, 0, 3, implicit-def $exec, implicit-def $vcc, implicit-def $scc, implicit $exec
S_BRANCH %bb.1
bb.1:
S_ENDPGM 0
...
# GCN-LABEL: name: no_hazard_vcmpx_smov_exec_lo_depctr_ffff
# GCN: $sgpr0 = S_MOV_B32 $exec_lo
# GCN-NEXT: S_WAITCNT_DEPCTR 65535
# GCN-NEXT: V_CMPX_LE_F32_nosdst_e32
---
name: no_hazard_vcmpx_smov_exec_lo_depctr_ffff
body: |
bb.0:
successors: %bb.1
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
$sgpr0 = S_MOV_B32 $exec_lo
S_WAITCNT_DEPCTR 65535
SI_KILL_F32_COND_IMM_TERMINATOR $vgpr0, 0, 3, implicit-def $exec, implicit-def $vcc, implicit-def $scc, implicit $exec
S_BRANCH %bb.1
bb.1:
S_ENDPGM 0
...
# GCN-LABEL: name: hazard_vcmpx_smov_exec_lo_depctr_effe
# GCN: $sgpr0 = S_MOV_B32 $exec_lo
# GCN: S_WAITCNT_DEPCTR 65534
# GCN-NEXT: V_CMPX_LE_F32_nosdst_e32
---
name: hazard_vcmpx_smov_exec_lo_depctr_effe
body: |
bb.0:
successors: %bb.1
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
$sgpr0 = S_MOV_B32 $exec_lo
S_WAITCNT_DEPCTR 61438
SI_KILL_F32_COND_IMM_TERMINATOR $vgpr0, 0, 3, implicit-def $exec, implicit-def $vcc, implicit-def $scc, implicit $exec
S_BRANCH %bb.1
bb.1:
S_ENDPGM 0
...

View File

@ -0,0 +1,210 @@
# RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass post-RA-hazard-rec -o - %s | FileCheck -check-prefix=GCN %s
# GCN-LABEL: name: vmem_write_sgpr
# GCN: BUFFER_LOAD_DWORD_OFFEN
# GCN-NEXT: V_NOP
# GCN-NEXT: S_MOV_B32
---
name: vmem_write_sgpr
body: |
bb.0:
$sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
$sgpr4 = IMPLICIT_DEF
$vgpr0 = IMPLICIT_DEF
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
$sgpr0 = S_MOV_B32 0
...
# GCN-LABEL: name: vmem_smem_write_sgpr
# GCN: BUFFER_LOAD_DWORD_OFFEN
# GCN-NEXT: V_NOP
# GCN-NEXT: S_LOAD_DWORD_IMM
---
name: vmem_smem_write_sgpr
body: |
bb.0:
$sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
$sgpr4 = IMPLICIT_DEF
$vgpr0 = IMPLICIT_DEF
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
$sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
...
# GCN-LABEL: name: vmem_snop_write_sgpr
# GCN: BUFFER_LOAD_DWORD_OFFEN
# GCN-NEXT: S_NOP
# GCN-NEXT: V_NOP
# GCN-NEXT: S_MOV_B32
---
name: vmem_snop_write_sgpr
body: |
bb.0:
$sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
$sgpr4 = IMPLICIT_DEF
$vgpr0 = IMPLICIT_DEF
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
S_NOP 0
$sgpr0 = S_MOV_B32 0
...
# GCN-LABEL: name: vmem_valu_write_sgpr
# GCN: BUFFER_LOAD_DWORD_OFFEN
# GCN-NEXT: V_ADD_F32
# GCN-NEXT: S_MOV_B32
---
name: vmem_valu_write_sgpr
body: |
bb.0:
$sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
$sgpr4 = IMPLICIT_DEF
$vgpr0 = IMPLICIT_DEF
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
$vgpr2 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $exec
$sgpr0 = S_MOV_B32 0
...
# GCN-LABEL: name: vmem_swait0_write_sgpr
# GCN: BUFFER_LOAD_DWORD_OFFEN
# GCN-NEXT: S_WAITCNT
# GCN-NEXT: S_MOV_B32
---
name: vmem_swait0_write_sgpr
body: |
bb.0:
$sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
$sgpr4 = IMPLICIT_DEF
$vgpr0 = IMPLICIT_DEF
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
S_WAITCNT 0
$sgpr0 = S_MOV_B32 0
...
# GCN-LABEL: name: vmem_swait_any_write_sgpr
# GCN: BUFFER_LOAD_DWORD_OFFEN
# GCN-NEXT: S_WAITCNT
# GCN-NEXT: V_NOP
# GCN-NEXT: S_MOV_B32
---
name: vmem_swait_any_write_sgpr
body: |
bb.0:
$sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
$sgpr4 = IMPLICIT_DEF
$vgpr0 = IMPLICIT_DEF
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
S_WAITCNT 1
$sgpr0 = S_MOV_B32 0
...
# GCN-LABEL: name: vmem_write_exec_impread
# GCN: BUFFER_LOAD_DWORD_OFFEN
# GCN-NEXT: S_MOV_B64
---
name: vmem_write_exec_impread
body: |
bb.0:
$sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
$sgpr4 = IMPLICIT_DEF
$vgpr0 = IMPLICIT_DEF
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
$exec = S_MOV_B64 7
...
# GCN-LABEL: name: vmem_write_exec_expread
# GCN: BUFFER_LOAD_DWORD_OFFEN
# GCN-NEXT: V_NOP
# GCN-NEXT: S_MOV_B64
---
name: vmem_write_exec_expread
body: |
bb.0:
$sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
$vgpr0 = IMPLICIT_DEF
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $exec_lo, 0, 0, 0, 0, 0, implicit $exec
$exec = S_MOV_B64 7
...
# GCN-LABEL: name: ds_write_m0
# GCN: DS_READ_B32
# GCN-NEXT: V_NOP
# GCN-NEXT: S_MOV_B32
---
name: ds_write_m0
body: |
bb.0:
$sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
$sgpr4 = IMPLICIT_DEF
$vgpr0 = IMPLICIT_DEF
$vgpr1 = DS_READ_B32 $vgpr0, 0, 0, implicit $m0, implicit $exec
$m0 = S_MOV_B32 7
...
# GCN-LABEL: name: vmem_write_sgpr_fall_through
# GCN: BUFFER_LOAD_DWORD_OFFEN
# GCN: V_NOP
# GCN-NEXT: S_MOV_B32
---
name: vmem_write_sgpr_fall_through
body: |
bb.0:
successors: %bb.1
$sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
$sgpr4 = IMPLICIT_DEF
$vgpr0 = IMPLICIT_DEF
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
bb.1:
$sgpr0 = S_MOV_B32 0
...
# GCN-LABEL: name: vmem_write_sgpr_branch
# GCN: BUFFER_LOAD_DWORD_OFFEN
# GCN-NEXT: S_BRANCH
# GCN: V_NOP
# GCN-NEXT: S_MOV_B32
---
name: vmem_write_sgpr_branch
body: |
bb.0:
successors: %bb.1
$sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
$sgpr4 = IMPLICIT_DEF
$vgpr0 = IMPLICIT_DEF
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
S_BRANCH %bb.1
bb.1:
$sgpr0 = S_MOV_B32 0
...
# GCN-LABEL: name: vmem_write_sgpr_branch_around
# GCN: BUFFER_LOAD_DWORD_OFFEN
# GCN-NEXT: S_BRANCH
# GCN: bb.2:
# GCN-NEXT: V_NOP
# GCN-NEXT: S_MOV_B32
---
name: vmem_write_sgpr_branch_around
body: |
bb.0:
successors: %bb.2
$sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
$sgpr4 = IMPLICIT_DEF
$vgpr0 = IMPLICIT_DEF
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
S_BRANCH %bb.2
bb.1:
successors: %bb.2
S_WAITCNT 0
bb.2:
$sgpr0 = S_MOV_B32 0
...
# GCN-LABEL: name: vmem_write_sgpr_branch_backedge
# GCN: $vgpr0 = IMPLICIT_DEF
# GCN-NEXT: V_NOP
# GCN-NEXT: S_MOV_B32
---
name: vmem_write_sgpr_branch_backedge
body: |
bb.0:
successors: %bb.1
$sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
$sgpr4 = IMPLICIT_DEF
$vgpr0 = IMPLICIT_DEF
$sgpr0 = S_MOV_B32 0
bb.1:
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
S_BRANCH %bb.0
...