From a41351e37c72c86c0547a74166712d023dd076b0 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 17 Nov 2017 21:35:32 +0000 Subject: [PATCH] AMDGPU: Move hazard avoidance out of waitcnt pass. This is mostly moving VMEM clause breaking into the hazard recognizer. Also move another hazard currently handled in the waitcnt pass. Also stops breaking clauses unless xnack is enabled. llvm-svn: 318557 --- llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h | 6 +- .../lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 91 +-- llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h | 2 +- llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp | 54 -- .../AMDGPU/break-vmem-soft-clauses.mir | 580 ++++++++++++++++++ .../CodeGen/AMDGPU/inserted-wait-states.mir | 8 +- .../test/CodeGen/AMDGPU/sendmsg-m0-hazard.mir | 49 ++ 7 files changed, 693 insertions(+), 97 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/break-vmem-soft-clauses.mir create mode 100644 llvm/test/CodeGen/AMDGPU/sendmsg-m0-hazard.mir diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h index f9b400cfe1b9..63634f434fa6 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -806,10 +806,14 @@ public: return getGeneration() >= AMDGPUSubtarget::GFX9; } - bool hasReadM0Hazard() const { + bool hasReadM0MovRelInterpHazard() const { return getGeneration() >= AMDGPUSubtarget::GFX9; } + bool hasReadM0SendMsgHazard() const { + return getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS; + } + unsigned getKernArgSegmentSize(const MachineFunction &MF, unsigned ExplictArgBytes) const; diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp index 42bd2023c8cb..be0588b45e30 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -87,6 +87,18 @@ static bool isSMovRel(unsigned Opcode) { } } +static bool isSendMsgTraceDataOrGDS(const MachineInstr &MI) { + switch (MI.getOpcode()) { + case AMDGPU::S_SENDMSG: + case AMDGPU::S_SENDMSGHALT: + case AMDGPU::S_TTRACEDATA: + return true; + default: + // TODO: GDS + return false; + } +} + static unsigned getHWReg(const SIInstrInfo *TII, const MachineInstr &RegInstr) { const MachineOperand *RegOp = TII->getNamedOperand(RegInstr, AMDGPU::OpName::simm16); @@ -100,7 +112,10 @@ GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { if (SIInstrInfo::isSMRD(*MI) && checkSMRDHazards(MI) > 0) return NoopHazard; - if (SIInstrInfo::isVMEM(*MI) && checkVMEMHazards(MI) > 0) + // FIXME: Should flat be considered vmem? + if ((SIInstrInfo::isVMEM(*MI) || + SIInstrInfo::isFLAT(*MI)) + && checkVMEMHazards(MI) > 0) return NoopHazard; if (SIInstrInfo::isVALU(*MI) && checkVALUHazards(MI) > 0) @@ -124,7 +139,12 @@ GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { if (isRFE(MI->getOpcode()) && checkRFEHazards(MI) > 0) return NoopHazard; - if ((TII.isVINTRP(*MI) || isSMovRel(MI->getOpcode())) && + if (ST.hasReadM0MovRelInterpHazard() && + (TII.isVINTRP(*MI) || isSMovRel(MI->getOpcode())) && + checkReadM0Hazards(MI) > 0) + return NoopHazard; + + if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(*MI) && checkReadM0Hazards(MI) > 0) return NoopHazard; @@ -144,26 +164,20 @@ unsigned GCNHazardRecognizer::PreEmitNoops(MachineInstr *MI) { if (SIInstrInfo::isSMRD(*MI)) return std::max(WaitStates, checkSMRDHazards(MI)); - if (SIInstrInfo::isVALU(*MI)) { - WaitStates = std::max(WaitStates, checkVALUHazards(MI)); + if (SIInstrInfo::isVALU(*MI)) + WaitStates = std::max(WaitStates, checkVALUHazards(MI)); - if (SIInstrInfo::isVMEM(*MI)) - WaitStates = std::max(WaitStates, checkVMEMHazards(MI)); + if (SIInstrInfo::isVMEM(*MI) || SIInstrInfo::isFLAT(*MI)) + WaitStates = std::max(WaitStates, checkVMEMHazards(MI)); - if (SIInstrInfo::isDPP(*MI)) - WaitStates = std::max(WaitStates, checkDPPHazards(MI)); + if (SIInstrInfo::isDPP(*MI)) + WaitStates = std::max(WaitStates, checkDPPHazards(MI)); - if (isDivFMas(MI->getOpcode())) - WaitStates = std::max(WaitStates, checkDivFMasHazards(MI)); + if (isDivFMas(MI->getOpcode())) + WaitStates = std::max(WaitStates, checkDivFMasHazards(MI)); - if (isRWLane(MI->getOpcode())) - WaitStates = std::max(WaitStates, checkRWLaneHazards(MI)); - - if (TII.isVINTRP(*MI)) - WaitStates = std::max(WaitStates, checkReadM0Hazards(MI)); - - return WaitStates; - } + if (isRWLane(MI->getOpcode())) + WaitStates = std::max(WaitStates, checkRWLaneHazards(MI)); if (isSGetReg(MI->getOpcode())) return std::max(WaitStates, checkGetRegHazards(MI)); @@ -174,7 +188,11 @@ unsigned GCNHazardRecognizer::PreEmitNoops(MachineInstr *MI) { if (isRFE(MI->getOpcode())) return std::max(WaitStates, checkRFEHazards(MI)); - if (TII.isVINTRP(*MI) || isSMovRel(MI->getOpcode())) + if (ST.hasReadM0MovRelInterpHazard() && (TII.isVINTRP(*MI) || + isSMovRel(MI->getOpcode()))) + return std::max(WaitStates, checkReadM0Hazards(MI)); + + if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(*MI)) return std::max(WaitStates, checkReadM0Hazards(MI)); return WaitStates; @@ -282,12 +300,14 @@ void GCNHazardRecognizer::addClauseInst(const MachineInstr &MI) { addRegsToSet(TRI, MI.uses(), ClauseUses); } -int GCNHazardRecognizer::checkSMEMSoftClauseHazards(MachineInstr *SMEM) { +int GCNHazardRecognizer::checkSoftClauseHazards(MachineInstr *MEM) { // SMEM soft clause are only present on VI+, and only matter if xnack is // enabled. if (!ST.isXNACKEnabled()) return 0; + bool IsSMRD = TII.isSMRD(*MEM); + resetClause(); // A soft-clause is any group of consecutive SMEM instructions. The @@ -303,7 +323,10 @@ int GCNHazardRecognizer::checkSMEMSoftClauseHazards(MachineInstr *SMEM) { for (MachineInstr *MI : EmittedInstrs) { // When we hit a non-SMEM instruction then we have passed the start of the // clause and we can stop. - if (!MI || !SIInstrInfo::isSMRD(*MI)) + if (!MI) + break; + + if (IsSMRD != SIInstrInfo::isSMRD(*MI)) break; addClauseInst(*MI); @@ -312,13 +335,13 @@ int GCNHazardRecognizer::checkSMEMSoftClauseHazards(MachineInstr *SMEM) { if (ClauseDefs.none()) return 0; - // FIXME: When we support stores, we need to make sure not to put loads and - // stores in the same clause if they use the same address. For now, just - // start a new clause whenever we see a store. - if (SMEM->mayStore()) + // We need to make sure not to put loads and stores in the same clause if they + // use the same address. For now, just start a new clause whenever we see a + // store. + if (MEM->mayStore()) return 1; - addClauseInst(*SMEM); + addClauseInst(*MEM); // If the set of defs and uses intersect then we cannot add this instruction // to the clause, so we have a hazard. @@ -329,7 +352,7 @@ int GCNHazardRecognizer::checkSMRDHazards(MachineInstr *SMRD) { const SISubtarget &ST = MF.getSubtarget(); int WaitStatesNeeded = 0; - WaitStatesNeeded = checkSMEMSoftClauseHazards(SMRD); + WaitStatesNeeded = checkSoftClauseHazards(SMRD); // This SMRD hazard only affects SI. if (ST.getGeneration() != SISubtarget::SOUTHERN_ISLANDS) @@ -369,18 +392,15 @@ int GCNHazardRecognizer::checkSMRDHazards(MachineInstr *SMRD) { } int GCNHazardRecognizer::checkVMEMHazards(MachineInstr* VMEM) { - const SIInstrInfo *TII = ST.getInstrInfo(); - if (ST.getGeneration() < SISubtarget::VOLCANIC_ISLANDS) return 0; - const SIRegisterInfo &TRI = TII->getRegisterInfo(); + int WaitStatesNeeded = checkSoftClauseHazards(VMEM); // A read of an SGPR by a VMEM instruction requires 5 wait states when the // SGPR was written by a VALU Instruction. - int VmemSgprWaitStates = 5; - int WaitStatesNeeded = 0; - auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); }; + const int VmemSgprWaitStates = 5; + auto IsHazardDefFn = [this] (MachineInstr *MI) { return TII.isVALU(*MI); }; for (const MachineOperand &Use : VMEM->uses()) { if (!Use.isReg() || TRI.isVGPR(MF.getRegInfo(), Use.getReg())) @@ -598,11 +618,8 @@ int GCNHazardRecognizer::checkAnyInstHazards(MachineInstr *MI) { } int GCNHazardRecognizer::checkReadM0Hazards(MachineInstr *MI) { - if (!ST.hasReadM0Hazard()) - return 0; - const SIInstrInfo *TII = ST.getInstrInfo(); - int SMovRelWaitStates = 1; + const int SMovRelWaitStates = 1; auto IsHazardFn = [TII] (MachineInstr *MI) { return TII->isSALU(*MI); }; diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h index eb382cc8c77b..01682acfac41 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h @@ -58,7 +58,7 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer { [](MachineInstr *) { return true; }); int getWaitStatesSinceSetReg(function_ref IsHazard); - int checkSMEMSoftClauseHazards(MachineInstr *SMEM); + int checkSoftClauseHazards(MachineInstr *SMEM); int checkSMRDHazards(MachineInstr *SMRD); int checkVMEMHazards(MachineInstr* VMEM); int checkDPPHazards(MachineInstr *DPP); diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index c41757d58259..2d41d8965b15 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -1522,8 +1522,6 @@ void SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF, ScoreBrackets->dump(); }); - bool InsertNOP = false; - // Walk over the instructions. for (MachineBasicBlock::iterator Iter = Block.begin(), E = Block.end(); Iter != E;) { @@ -1624,58 +1622,6 @@ void SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF, VCCZBugHandledSet.insert(&Inst); } - if (ST->getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) { - - // This avoids a s_nop after a waitcnt has just been inserted. - if (!SWaitInst && InsertNOP) { - BuildMI(Block, Inst, DebugLoc(), TII->get(AMDGPU::S_NOP)).addImm(0); - } - InsertNOP = false; - - // Any occurrence of consecutive VMEM or SMEM instructions forms a VMEM - // or SMEM clause, respectively. - // - // The temporary workaround is to break the clauses with S_NOP. - // - // The proper solution would be to allocate registers such that all source - // and destination registers don't overlap, e.g. this is illegal: - // r0 = load r2 - // r2 = load r0 - bool IsSMEM = false; - bool IsVMEM = false; - if (TII->isSMRD(Inst)) - IsSMEM = true; - else if (TII->usesVM_CNT(Inst)) - IsVMEM = true; - - ++Iter; - if (Iter == E) - break; - - MachineInstr &Next = *Iter; - - // TODO: How about consecutive SMEM instructions? - // The comments above says break the clause but the code does not. - // if ((TII->isSMRD(next) && isSMEM) || - if (!IsSMEM && TII->usesVM_CNT(Next) && IsVMEM && - // TODO: Enable this check when hasSoftClause is upstreamed. - // ST->hasSoftClauses() && - ST->isXNACKEnabled()) { - // Insert a NOP to break the clause. - InsertNOP = true; - continue; - } - - // There must be "S_NOP 0" between an instruction writing M0 and - // S_SENDMSG. - if ((Next.getOpcode() == AMDGPU::S_SENDMSG || - Next.getOpcode() == AMDGPU::S_SENDMSGHALT) && - Inst.definesRegister(AMDGPU::M0)) - InsertNOP = true; - - continue; - } - ++Iter; } diff --git a/llvm/test/CodeGen/AMDGPU/break-vmem-soft-clauses.mir b/llvm/test/CodeGen/AMDGPU/break-vmem-soft-clauses.mir new file mode 100644 index 000000000000..92145d319b19 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/break-vmem-soft-clauses.mir @@ -0,0 +1,580 @@ +# RUN: llc -march=amdgcn -mcpu=carrizo -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,XNACK %s +# RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,NOXNACK %s +--- +# Trivial clause at beginning of program +name: trivial_clause_load_flat4_x1 + +body: | + bb.0: + ; GCN-LABEL: name: trivial_clause_load_flat4_x1 + ; GCN: %vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr + ; GCN-NEXT: S_ENDPGM + + %vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr + S_ENDPGM +... +--- +# Trivial clause at beginning of program +name: trivial_clause_load_flat4_x2 + +body: | + bb.0: + ; GCN-LABEL: name: trivial_clause_load_flat4_x2 + ; GCN: %vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr + ; GCN-NEXT: %vgpr1 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr + ; GCN-NEXT: S_ENDPGM + + %vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr + %vgpr1 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr + S_ENDPGM +... +--- +# Trivial clause at beginning of program +name: trivial_clause_load_flat4_x3 + +body: | + bb.0: + ; GCN-LABEL: name: trivial_clause_load_flat4_x3 + ; GCN: %vgpr0 = FLAT_LOAD_DWORD %vgpr3_vgpr4, 0, 0, 0, implicit %exec, implicit %flat_scr + ; GCN-NEXT: %vgpr1 = FLAT_LOAD_DWORD %vgpr5_vgpr6, 0, 0, 0, implicit %exec, implicit %flat_scr + ; GCN-NEXT: %vgpr2 = FLAT_LOAD_DWORD %vgpr7_vgpr8, 0, 0, 0, implicit %exec, implicit %flat_scr + ; GCN-NEXT: S_ENDPGM + + %vgpr0 = FLAT_LOAD_DWORD %vgpr3_vgpr4, 0, 0, 0, implicit %exec, implicit %flat_scr + %vgpr1 = FLAT_LOAD_DWORD %vgpr5_vgpr6, 0, 0, 0, implicit %exec, implicit %flat_scr + %vgpr2 = FLAT_LOAD_DWORD %vgpr7_vgpr8, 0, 0, 0, implicit %exec, implicit %flat_scr + S_ENDPGM +... +--- +# Trivial clause at beginning of program +name: trivial_clause_load_flat4_x4 + +body: | + bb.0: + ; GCN-LABEL: name: trivial_clause_load_flat4_x4 + ; GCN: %vgpr0 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr + ; GCN-NEXT: %vgpr1 = FLAT_LOAD_DWORD %vgpr6_vgpr7, 0, 0, 0, implicit %exec, implicit %flat_scr + ; GCN-NEXT: %vgpr2 = FLAT_LOAD_DWORD %vgpr8_vgpr9, 0, 0, 0, implicit %exec, implicit %flat_scr + ; GCN-NEXT: %vgpr3 = FLAT_LOAD_DWORD %vgpr10_vgpr11, 0, 0, 0, implicit %exec, implicit %flat_scr + ; GCN-NEXT: S_ENDPGM + + %vgpr0 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr + %vgpr1 = FLAT_LOAD_DWORD %vgpr6_vgpr7, 0, 0, 0, implicit %exec, implicit %flat_scr + %vgpr2 = FLAT_LOAD_DWORD %vgpr8_vgpr9, 0, 0, 0, implicit %exec, implicit %flat_scr + %vgpr3 = FLAT_LOAD_DWORD %vgpr10_vgpr11, 0, 0, 0, implicit %exec, implicit %flat_scr + S_ENDPGM +... +--- +# Reuse of same input pointer is OK + +name: trivial_clause_load_flat4_x2_sameptr +body: | + bb.0: + ; GCN-LABEL: name: trivial_clause_load_flat4_x2_sameptr + ; GCN: %vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr + ; GCN-NEXT: %vgpr1 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr + ; GCN-NEXT: S_ENDPGM + + %vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr + %vgpr1 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr + S_ENDPGM +... +--- +# 32-bit load partially clobbers its own ptr reg +name: flat_load4_overwrite_ptr_lo + +body: | + bb.0: + ; GCN-LABEL: name: flat_load4_overwrite_ptr_lo + ; GCN: %vgpr0 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr + ; GCN-NEXT: S_ENDPGM + + %vgpr0 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr + S_ENDPGM +... +--- +# 32-bit load partially clobbers its own ptr reg +name: flat_load4_overwrite_ptr_hi + +body: | + bb.0: + ; GCN-LABEL: name: flat_load4_overwrite_ptr_hi + ; GCN: %vgpr1 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr + ; GCN-NEXT: S_ENDPGM + + %vgpr1 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr + S_ENDPGM +... +--- +# 64-bit load clobbers its own ptr reg +name: flat_load8_overwrite_ptr + +body: | + bb.0: + ; GCN-LABEL: name: flat_load8_overwrite_ptr + ; GCN: %vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr + ; GCN-NEXT: S_ENDPGM + + %vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr + S_ENDPGM +... +--- +# vmcnt has 4 bits, so maximum 16 outstanding loads. The waitcnt +# breaks the clause. + + +name: break_clause_at_max_clause_size_flat_load4 + +body: | + bb.0: + ; GCN-LABEL: name: break_clause_at_max_clause_size_flat_load4 + ; GCN: %vgpr2 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr + ; GCN-NEXT: %vgpr3 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr + ; GCN-NEXT: %vgpr4 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr + ; GCN-NEXT: %vgpr5 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr + ; GCN-NEXT: %vgpr6 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr + ; GCN-NEXT: %vgpr7 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr + ; GCN-NEXT: %vgpr8 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr + ; GCN-NEXT: %vgpr9 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr + ; GCN-NEXT: %vgpr10 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr + ; GCN-NEXT: %vgpr11 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr + ; GCN-NEXT: %vgpr12 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr + ; GCN-NEXT: %vgpr13 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr + ; GCN-NEXT: %vgpr14 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr + ; GCN-NEXT: %vgpr15 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr + ; GCN-NEXT: %vgpr16 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr + ; GCN-NEXT: %vgpr17 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr + ; XNACK-NEXT: S_NOP 0 + ; GCN-NEXT: %vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr + ; GCN-NEXT: %sgpr0 = S_MOV_B32 %sgpr0, implicit %vgpr2, implicit %vgpr3, implicit %vgpr4, implicit %vgpr5, implicit %vgpr6, implicit %vgpr7, implicit %vgpr8, implicit %vgpr9, implicit %vgpr10, implicit %vgpr11, implicit %vgpr12, implicit %vgpr13, implicit %vgpr14, implicit %vgpr15, implicit %vgpr16, implicit %vgpr17, implicit %vgpr18 + ; GCN-NEXT: S_ENDPGM + + %vgpr2 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr + %vgpr3 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr + %vgpr4 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr + %vgpr5 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr + + %vgpr6 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr + %vgpr7 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr + %vgpr8 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr + %vgpr9 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr + + %vgpr10 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr + %vgpr11 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr + %vgpr12 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr + %vgpr13 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr + + %vgpr14 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr + %vgpr15 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr + %vgpr16 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr + %vgpr17 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr + + %vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr + %sgpr0 = S_MOV_B32 %sgpr0, implicit %vgpr2, implicit %vgpr3, implicit %vgpr4, implicit %vgpr5, implicit %vgpr6, implicit %vgpr7, implicit %vgpr8, implicit %vgpr9, implicit %vgpr10, implicit %vgpr11, implicit %vgpr12, implicit %vgpr13, implicit %vgpr14, implicit %vgpr15, implicit %vgpr16, implicit %vgpr17, implicit %vgpr18 + S_ENDPGM +... +--- + +name: break_clause_simple_load_flat4_lo_ptr + +body: | + bb.0: + ; GCN-LABEL: name: break_clause_simple_load_flat4_lo_ptr + ; GCN: %vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr + ; XNACK-NEXT: S_NOP 0 + ; GCN-NEXT: %vgpr2 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr + ; GCN-NEXT: S_ENDPGM + + %vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr + %vgpr2 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr + S_ENDPGM +... +--- + +name: break_clause_simple_load_flat4_hi_ptr + +body: | + bb.0: + ; GCN-LABEL: name: break_clause_simple_load_flat4_hi_ptr + ; GCN: %vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr + ; XNACK-NEXT: S_NOP 0 + ; GCN-NEXT: %vgpr3 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr + ; GCN-NEXT: S_ENDPGM + + %vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr + %vgpr3 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr + S_ENDPGM +... +--- + +name: break_clause_simple_load_flat8_ptr + +body: | + bb.0: + ; GCN-LABEL: name: break_clause_simple_load_flat8_ptr + ; GCN: %vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr + ; XNACK-NEXT: S_NOP 0 + ; GCN-NEXT: %vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr + ; GCN-NEXT: S_ENDPGM + + %vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr + %vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr + S_ENDPGM +... +--- + + +name: break_clause_simple_load_flat16_ptr + +body: | + bb.0: + ; GCN-LABEL: name: break_clause_simple_load_flat16_ptr + ; GCN: %vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr + ; XNACK-NEXT: S_NOP 0 + ; GCN-NEXT: %vgpr2_vgpr3_vgpr4_vgpr5 = FLAT_LOAD_DWORDX4 %vgpr6_vgpr7, 0, 0, 0, implicit %exec, implicit %flat_scr + ; GCN-NEXT: S_ENDPGM + %vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr + %vgpr2_vgpr3_vgpr4_vgpr5 = FLAT_LOAD_DWORDX4 %vgpr6_vgpr7, 0, 0, 0, implicit %exec, implicit %flat_scr + S_ENDPGM +... +--- + +# The clause is broken by the waitcnt inserted at the end of the +# block, so no nop is needed. + + +name: break_clause_block_boundary_load_flat8_ptr + +body: | + ; GCN-LABEL: name: break_clause_block_boundary_load_flat8_ptr + ; GCN: bb.0: + ; GCN-NEXT: successors: %bb.1(0x80000000) + ; GCN: %vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr + ; GCN: bb.1: + ; XNACK-NEXT: S_NOP 0 + ; GCN-NEXT: %vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr + ; GCN-NEXT: S_ENDPGM + + bb.0: + %vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr + + bb.1: + %vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr + S_ENDPGM +... +--- +# The load clobbers the pointer of the store, so it needs to break. + +name: break_clause_store_load_into_ptr_flat4 + +body: | + bb.0: + ; GCN-LABEL: name: break_clause_store_load_into_ptr_flat4 + ; GCN: FLAT_STORE_DWORD %vgpr2_vgpr3, %vgpr0, 0, 0, 0, implicit %exec, implicit %flat_scr + ; GCN-NEXT: %vgpr2 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr + ; GCN-NEXT: S_ENDPGM + + FLAT_STORE_DWORD %vgpr2_vgpr3, %vgpr0, 0, 0, 0, implicit %exec, implicit %flat_scr + %vgpr2 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr + S_ENDPGM +... +--- +# The load clobbers the data of the store, so it needs to break. +# FIXME: Would it be better to s_nop and wait later? + +name: break_clause_store_load_into_data_flat4 + +body: | + bb.0: + ; GCN-LABEL: name: break_clause_store_load_into_data_flat4 + ; GCN: FLAT_STORE_DWORD %vgpr2_vgpr3, %vgpr0, 0, 0, 0, implicit %exec, implicit %flat_scr + ; GCN-NEXT: %vgpr0 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr + ; GCN-NEXT: S_ENDPGM + + FLAT_STORE_DWORD %vgpr2_vgpr3, %vgpr0, 0, 0, 0, implicit %exec, implicit %flat_scr + %vgpr0 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr + S_ENDPGM +... +--- +# Regular VALU instruction breaks clause, no nop needed + +name: valu_inst_breaks_clause + +body: | + bb.0: + ; GCN-LABEL: name: valu_inst_breaks_clause + ; GCN: %vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr + ; GCN-NEXT: %vgpr8 = V_MOV_B32_e32 0, implicit %exec + ; XNACK-NEXT: S_NOP 0 + ; GCN-NEXT: %vgpr2 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr + ; GCN-NEXT: S_ENDPGM + + %vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr + %vgpr8 = V_MOV_B32_e32 0, implicit %exec + %vgpr2 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr + S_ENDPGM +... +--- +# Regular SALU instruction breaks clause, no nop needed + +name: salu_inst_breaks_clause + +body: | + bb.0: + ; GCN-LABEL: name: salu_inst_breaks_clause + ; GCN: %vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr + ; GCN-NEXT: %sgpr8 = S_MOV_B32 0 + ; XNACK-NEXT: S_NOP 0 + ; GCN-NEXT: %vgpr2 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr + ; GCN-NEXT: S_ENDPGM + + %vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr + %sgpr8 = S_MOV_B32 0 + %vgpr2 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr + S_ENDPGM +... +--- + +name: ds_inst_breaks_clause + +body: | + bb.0: + ; GCN-LABEL: name: ds_inst_breaks_clause + ; GCN: %vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr + ; GCN-NEXT: %vgpr8 = DS_READ_B32 %vgpr9, 0, 0, implicit %m0, implicit %exec + ; XNACK-NEXT: S_NOP 0 + ; GCN-NEXT: %vgpr2 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr + ; GCN-NEXT: S_ENDPGM + + %vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr + %vgpr8 = DS_READ_B32 %vgpr9, 0, 0, implicit %m0, implicit %exec + %vgpr2 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr + S_ENDPGM +... +--- + +name: smrd_inst_breaks_clause + +body: | + bb.0: + ; GCN-LABEL: name: smrd_inst_breaks_clause + ; GCN: %vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr + ; GCN-NEXT: %sgpr8 = S_LOAD_DWORD_IMM %sgpr0_sgpr1, 0, 0 + ; GCN-NEXT: %vgpr2 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr + ; GCN-NEXT: S_ENDPGM + + %vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr + %sgpr8 = S_LOAD_DWORD_IMM %sgpr0_sgpr1, 0, 0 + %vgpr2 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr + S_ENDPGM +... +--- +# FIXME: Should this be handled? +name: implicit_use_breaks_clause + +body: | + bb.0: + ; GCN-LABEL: name: implicit_use_breaks_clause + ; GCN: %vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr, implicit %vgpr4_vgpr5 + ; XNACK-NEXT: S_NOP 0 + ; GCN-NEXT: %vgpr4_vgpr5 = FLAT_LOAD_DWORDX2 %vgpr6_vgpr7, 0, 0, 0, implicit %exec, implicit %flat_scr + ; GCN-NEXT: S_ENDPGM + + %vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr, implicit %vgpr4_vgpr5 + %vgpr4_vgpr5 = FLAT_LOAD_DWORDX2 %vgpr6_vgpr7, 0, 0, 0, implicit %exec, implicit %flat_scr + S_ENDPGM +... +--- +name: trivial_clause_load_mubuf4_x2 + +body: | + bb.0: + ; GCN-LABEL: name: trivial_clause_load_mubuf4_x2 + ; GCN: %vgpr1 = BUFFER_LOAD_DWORD_OFFEN %vgpr2, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec + ; GCN-NEXT: %vgpr3 = BUFFER_LOAD_DWORD_OFFEN %vgpr4, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec + ; GCN-NEXT: S_ENDPGM + + %vgpr1 = BUFFER_LOAD_DWORD_OFFEN %vgpr2, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec + %vgpr3 = BUFFER_LOAD_DWORD_OFFEN %vgpr4, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec + S_ENDPGM +... +--- +name: break_clause_simple_load_mubuf_offen_ptr + +body: | + bb.0: + ; GCN-LABEL: name: break_clause_simple_load_mubuf_offen_ptr + ; GCN: %vgpr1 = BUFFER_LOAD_DWORD_OFFEN %vgpr2, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec + ; XNACK-NEXT: S_NOP 0 + ; GCN-NEXT: %vgpr2 = BUFFER_LOAD_DWORD_OFFEN %vgpr3, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec + ; GCN-NEXT: S_ENDPGM + + %vgpr1 = BUFFER_LOAD_DWORD_OFFEN %vgpr2, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec + %vgpr2 = BUFFER_LOAD_DWORD_OFFEN %vgpr3, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec + S_ENDPGM +... +--- +# BUFFER instructions overwriting their own inputs is supposedly OK. + +name: mubuf_load4_overwrite_ptr + +body: | + bb.0: + ; GCN-LABEL: name: mubuf_load4_overwrite_ptr + ; GCN: %vgpr0 = BUFFER_LOAD_DWORD_OFFEN %vgpr0, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec + ; GCN-NEXT: %vgpr1 = V_MOV_B32_e32 0, implicit %exec + ; GCN-NEXT: %vgpr2 = V_MOV_B32_e32 %vgpr0, implicit %exec + ; GCN-NEXT: S_ENDPGM + %vgpr0 = BUFFER_LOAD_DWORD_OFFEN %vgpr0, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec + %vgpr1 = V_MOV_B32_e32 0, implicit %exec + %vgpr2 = V_MOV_B32_e32 %vgpr0, implicit %exec + S_ENDPGM +... +--- +# Break a clause from interference between mubuf and flat instructions + +name: break_clause_flat_load_mubuf_load + +body: | + bb.0: + ; GCN-LABEL: name: break_clause_flat_load_mubuf_load + ; GCN: %vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr + ; XNACK-NEXT: S_NOP 0 + ; GCN-NEXT: %vgpr2 = BUFFER_LOAD_DWORD_OFFEN %vgpr1, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec + ; GCN-NEXT: S_ENDPGM + + %vgpr0 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr + %vgpr2 = BUFFER_LOAD_DWORD_OFFEN %vgpr1, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec + S_ENDPGM +... +# Break a clause from interference between mubuf and flat instructions + +# GCN-LABEL: name: break_clause_mubuf_load_flat_load +# GCN: bb.0: +# GCN-NEXT: %vgpr0 = BUFFER_LOAD_DWORD_OFFEN %vgpr1, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4 +# XNACK-NEXT: S_NOP 0 +# GCN-NEXT: %vgpr1 = FLAT_LOAD_DWORD %vgpr2_vgpr3 +# GCN-NEXT: S_ENDPGM +name: break_clause_mubuf_load_flat_load + +body: | + bb.0: + %vgpr0 = BUFFER_LOAD_DWORD_OFFEN %vgpr1, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec + %vgpr1 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr + + S_ENDPGM +... +--- + +name: break_clause_atomic_rtn_into_ptr_flat4 + +body: | + bb.0: + ; GCN-LABEL: name: break_clause_atomic_rtn_into_ptr_flat4 + ; GCN: %vgpr2 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr + ; XNACK-NEXT: S_NOP 0 + ; GCN-NEXT: %vgpr4 = FLAT_ATOMIC_ADD_RTN %vgpr5_vgpr6, %vgpr7, 0, 0, implicit %exec, implicit %flat_scr + ; GCN-NEXT: S_ENDPGM + + %vgpr2 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr + %vgpr4 = FLAT_ATOMIC_ADD_RTN %vgpr5_vgpr6, %vgpr7, 0, 0, implicit %exec, implicit %flat_scr + S_ENDPGM +... +--- +name: break_clause_atomic_nortn_ptr_load_flat4 + +body: | + bb.0: + ; GCN-LABEL: name: break_clause_atomic_nortn_ptr_load_flat4 + ; GCN: FLAT_ATOMIC_ADD %vgpr0_vgpr1, %vgpr2, 0, 0, implicit %exec, implicit %flat_scr + ; GCN-NEXT: %vgpr2 = FLAT_LOAD_DWORD %vgpr3_vgpr4, 0, 0, 0, implicit %exec, implicit %flat_scr + ; GCN-NEXT: S_ENDPGM + + FLAT_ATOMIC_ADD %vgpr0_vgpr1, %vgpr2, 0, 0, implicit %exec, implicit %flat_scr + %vgpr2 = FLAT_LOAD_DWORD %vgpr3_vgpr4, 0, 0, 0, implicit %exec, implicit %flat_scr + S_ENDPGM +... +--- + +name: break_clause_atomic_rtn_into_ptr_mubuf4 + +body: | + bb.0: + ; GCN-LABEL: name: break_clause_atomic_rtn_into_ptr_mubuf4 + ; GCN: %vgpr1 = BUFFER_LOAD_DWORD_OFFEN %vgpr2, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec + ; XNACK-NEXT: S_NOP 0 + ; GCN-NEXT: %vgpr2 = BUFFER_ATOMIC_ADD_OFFEN_RTN %vgpr2, %vgpr5, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, implicit %exec + ; GCN-NEXT: S_ENDPGM + + %vgpr1 = BUFFER_LOAD_DWORD_OFFEN %vgpr2, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec + %vgpr2 = BUFFER_ATOMIC_ADD_OFFEN_RTN %vgpr2, %vgpr5, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, implicit %exec + S_ENDPGM +... +--- + +name: break_clause_atomic_nortn_ptr_load_mubuf4 + +body: | + bb.0: + ; GCN-LABEL: name: break_clause_atomic_nortn_ptr_load_mubuf4 + ; GCN: BUFFER_ATOMIC_ADD_OFFEN %vgpr0, %vgpr1, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, implicit %exec + ; GCN-NEXT: %vgpr1 = BUFFER_LOAD_DWORD_OFFEN %vgpr2, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec + ; GCN-NEXT: S_ENDPGM + + BUFFER_ATOMIC_ADD_OFFEN %vgpr0, %vgpr1, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, implicit %exec + %vgpr1 = BUFFER_LOAD_DWORD_OFFEN %vgpr2, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec + S_ENDPGM +... +--- +# Make sure there is no assert on mubuf instructions which do not have +# vaddr, and don't add register to track. +name: no_break_clause_mubuf_load_novaddr + +body: | + bb.0: + ; GCN-LABEL: name: no_break_clause_mubuf_load_novaddr + ; GCN: %vgpr1 = BUFFER_LOAD_DWORD_OFFSET %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec + ; GCN-NEXT: %vgpr3 = BUFFER_LOAD_DWORD_OFFSET %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec + ; GCN-NEXT: S_ENDPGM + %vgpr1 = BUFFER_LOAD_DWORD_OFFSET %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec + %vgpr3 = BUFFER_LOAD_DWORD_OFFSET %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec + S_ENDPGM +... +--- +# Loads and stores using different addresses theoretically does not +# need a nop +name: mix_load_store_clause +body: | + bb.0: + ; GCN-LABEL: name: mix_load_store_clause + ; GCN: FLAT_STORE_DWORD %vgpr0_vgpr1, %vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr + ; GCN-NEXT: %vgpr10 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr + ; XNACK-NEXT: S_NOP 0 + ; GCN-NEXT: FLAT_STORE_DWORD %vgpr2_vgpr3, %vgpr6, 0, 0, 0, implicit %exec, implicit %flat_scr + ; GCN-NEXT: %vgpr11 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr + + FLAT_STORE_DWORD %vgpr0_vgpr1, %vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr + %vgpr10 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr + FLAT_STORE_DWORD %vgpr2_vgpr3, %vgpr6, 0, 0, 0, implicit %exec, implicit %flat_scr + %vgpr11 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr + S_ENDPGM +... +--- +# Loads and stores using the same address needs a nop. + +name: mix_load_store_clause_same_address +body: | + bb.0: + ; GCN-LABEL: name: mix_load_store_clause_same_address + ; GCN: FLAT_STORE_DWORD %vgpr0_vgpr1, %vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr + ; GCN-NEXT: %vgpr10 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr + ; XNACK-NEXT: S_NOP 0 + ; GCN-NEXT: FLAT_STORE_DWORD %vgpr0_vgpr1, %vgpr6, 0, 0, 0, implicit %exec, implicit %flat_scr + ; GCN-NEXT: %vgpr11 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr + + FLAT_STORE_DWORD %vgpr0_vgpr1, %vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr + %vgpr10 = FLAT_LOAD_DWORD %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr + FLAT_STORE_DWORD %vgpr0_vgpr1, %vgpr6, 0, 0, 0, implicit %exec, implicit %flat_scr + %vgpr11 = FLAT_LOAD_DWORD %vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr + S_ENDPGM +... diff --git a/llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir b/llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir index 15006e5fdca2..16d9070849b9 100644 --- a/llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir +++ b/llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir @@ -437,22 +437,22 @@ body: | # GCN-LABEL: bb.0: # GCN: S_MOV_B32 -# GFX9: S_NOP +# GFX9-NEXT: S_NOP # GCN-NEXT: V_INTERP_P1_F32 # GCN-LABEL: bb.1: # GCN: S_MOV_B32 -# GFX9: S_NOP +# GFX9-NEXT: S_NOP # GCN-NEXT: V_INTERP_P2_F32 # GCN-LABEL: bb.2: # GCN: S_MOV_B32 -# GFX9: S_NOP +# GFX9-NEXT: S_NOP # GCN-NEXT: V_INTERP_P1_F32_16bank # GCN-LABEL: bb.3: # GCN: S_MOV_B32 -# GFX9: S_NOP +# GFX9-NEXT: S_NOP # GCN-NEXT: V_INTERP_MOV_F32 name: v_interp diff --git a/llvm/test/CodeGen/AMDGPU/sendmsg-m0-hazard.mir b/llvm/test/CodeGen/AMDGPU/sendmsg-m0-hazard.mir new file mode 100644 index 000000000000..5dfd5aa384fd --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/sendmsg-m0-hazard.mir @@ -0,0 +1,49 @@ +# RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,GFX9 %s +# RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,VI %s +# RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,CI %s +# RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,SI %s + +--- +name: m0_sendmsg +body: | + ; GCN-LABEL: name: m0_sendmsg + ; GCN: %m0 = S_MOV_B32 -1 + ; VI-NEXT: S_NOP 0 + ; GFX9-NEXT: S_NOP 0 + ; GCN-NEXT: S_SENDMSG 3, implicit %exec, implicit %m0 + + bb.0: + %m0 = S_MOV_B32 -1 + S_SENDMSG 3, implicit %exec, implicit %m0 + S_ENDPGM +... +--- + +name: m0_sendmsghalt +body: | + ; GCN-LABEL: name: m0_sendmsghalt + ; GCN: %m0 = S_MOV_B32 -1 + ; VI-NEXT: S_NOP 0 + ; GFX9-NEXT: S_NOP 0 + ; GCN-NEXT: S_SENDMSGHALT 3, implicit %exec, implicit %m0 + + bb.0: + %m0 = S_MOV_B32 -1 + S_SENDMSGHALT 3, implicit %exec, implicit %m0 + S_ENDPGM +... +--- + +name: m0_ttracedata +body: | + ; GCN-LABEL: name: m0_ttracedata + ; GCN: %m0 = S_MOV_B32 -1 + ; VI-NEXT: S_NOP 0 + ; GFX9-NEXT: S_NOP 0 + ; GCN-NEXT: S_TTRACEDATA implicit %m0 + + bb.0: + %m0 = S_MOV_B32 -1 + S_TTRACEDATA implicit %m0 + S_ENDPGM +...