From 95292bbfcd70cd52648b930827c76c8b6c86272f Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Tue, 20 Jan 2015 17:49:47 +0000 Subject: [PATCH] R600/SI: Use external symbols for scratch buffer We were passing the scratch buffer address to the shaders via user sgprs, but now we use external symbols and have the driver patch the shader using reloc information. llvm-svn: 226586 --- llvm/lib/Target/R600/AMDGPU.h | 6 +- llvm/lib/Target/R600/AMDGPUISelDAGToDAG.cpp | 21 ++++- llvm/lib/Target/R600/AMDGPUMCInstLower.cpp | 6 ++ llvm/lib/Target/R600/SIDefines.h | 1 + llvm/lib/Target/R600/SIInstrInfo.cpp | 4 +- llvm/lib/Target/R600/SIInstructions.td | 8 +- llvm/lib/Target/R600/SIPrepareScratchRegs.cpp | 92 ++++++++++--------- llvm/lib/Target/R600/SIRegisterInfo.cpp | 34 ++----- llvm/lib/Target/R600/SIRegisterInfo.h | 2 +- 9 files changed, 92 insertions(+), 82 deletions(-) diff --git a/llvm/lib/Target/R600/AMDGPU.h b/llvm/lib/Target/R600/AMDGPU.h index fcf9eca80e96..c6600550126e 100644 --- a/llvm/lib/Target/R600/AMDGPU.h +++ b/llvm/lib/Target/R600/AMDGPU.h @@ -77,7 +77,11 @@ extern Target TheGCNTarget; namespace AMDGPU { enum TargetIndex { - TI_CONSTDATA_START + TI_CONSTDATA_START, + TI_SCRATCH_RSRC_DWORD0, + TI_SCRATCH_RSRC_DWORD1, + TI_SCRATCH_RSRC_DWORD2, + TI_SCRATCH_RSRC_DWORD3 }; } diff --git a/llvm/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/R600/AMDGPUISelDAGToDAG.cpp index 28b4183d277e..e0e81680cc08 100644 --- a/llvm/lib/Target/R600/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/R600/AMDGPUISelDAGToDAG.cpp @@ -962,16 +962,27 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc, const SITargetLowering& Lowering = *static_cast(getTargetLowering()); - unsigned ScratchPtrReg = - TRI->getPreloadedValue(MF, SIRegisterInfo::SCRATCH_PTR); unsigned ScratchOffsetReg = TRI->getPreloadedValue(MF, SIRegisterInfo::SCRATCH_WAVE_OFFSET); Lowering.CreateLiveInRegister(*CurDAG, &AMDGPU::SReg_32RegClass, ScratchOffsetReg, MVT::i32); + SDValue Sym0 = CurDAG->getExternalSymbol("SCRATCH_RSRC_DWORD0", MVT::i32); + SDValue ScratchRsrcDword0 = + SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, Sym0), 0); - SDValue ScratchPtr = - CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, - MRI.getLiveInVirtReg(ScratchPtrReg), MVT::i64); + SDValue Sym1 = CurDAG->getExternalSymbol("SCRATCH_RSRC_DWORD1", MVT::i32); + SDValue ScratchRsrcDword1 = + SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, Sym1), 0); + + const SDValue RsrcOps[] = { + CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, MVT::i32), + ScratchRsrcDword0, + CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32), + ScratchRsrcDword1, + CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32), + }; + SDValue ScratchPtr = SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL, + MVT::v2i32, RsrcOps), 0); Rsrc = SDValue(Lowering.buildScratchRSRC(*CurDAG, DL, ScratchPtr), 0); SOffset = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, MRI.getLiveInVirtReg(ScratchOffsetReg), MVT::i32); diff --git a/llvm/lib/Target/R600/AMDGPUMCInstLower.cpp b/llvm/lib/Target/R600/AMDGPUMCInstLower.cpp index 5d870d5e6613..03aa32d05ef0 100644 --- a/llvm/lib/Target/R600/AMDGPUMCInstLower.cpp +++ b/llvm/lib/Target/R600/AMDGPUMCInstLower.cpp @@ -80,6 +80,12 @@ void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const { MCOp = MCOperand::CreateExpr(Expr); break; } + case MachineOperand::MO_ExternalSymbol: { + MCSymbol *Sym = Ctx.GetOrCreateSymbol(StringRef(MO.getSymbolName())); + const MCSymbolRefExpr *Expr = MCSymbolRefExpr::Create(Sym, Ctx); + MCOp = MCOperand::CreateExpr(Expr); + break; + } } OutMI.addOperand(MCOp); } diff --git a/llvm/lib/Target/R600/SIDefines.h b/llvm/lib/Target/R600/SIDefines.h index 73a9c73d8e7b..1c74dda5362c 100644 --- a/llvm/lib/Target/R600/SIDefines.h +++ b/llvm/lib/Target/R600/SIDefines.h @@ -163,4 +163,5 @@ namespace SIOutMods { #define R_00B860_COMPUTE_TMPRING_SIZE 0x00B860 #define S_00B860_WAVESIZE(x) (((x) & 0x1FFF) << 12) + #endif diff --git a/llvm/lib/Target/R600/SIInstrInfo.cpp b/llvm/lib/Target/R600/SIInstrInfo.cpp index 8c2f324005aa..ccf90ddfae04 100644 --- a/llvm/lib/Target/R600/SIInstrInfo.cpp +++ b/llvm/lib/Target/R600/SIInstrInfo.cpp @@ -482,7 +482,7 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, .addFrameIndex(FrameIndex) // Place-holder registers, these will be filled in by // SIPrepareScratchRegs. - .addReg(AMDGPU::SGPR0_SGPR1, RegState::Undef) + .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef) .addReg(AMDGPU::SGPR0, RegState::Undef); } else { LLVMContext &Ctx = MF->getFunction()->getContext(); @@ -528,7 +528,7 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, .addFrameIndex(FrameIndex) // Place-holder registers, these will be filled in by // SIPrepareScratchRegs. - .addReg(AMDGPU::SGPR0_SGPR1, RegState::Undef) + .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef) .addReg(AMDGPU::SGPR0, RegState::Undef); } else { diff --git a/llvm/lib/Target/R600/SIInstructions.td b/llvm/lib/Target/R600/SIInstructions.td index c09bed7840fc..bd680233e658 100644 --- a/llvm/lib/Target/R600/SIInstructions.td +++ b/llvm/lib/Target/R600/SIInstructions.td @@ -1951,14 +1951,14 @@ multiclass SI_SPILL_SGPR { let UseNamedOperandTable = 1 in { def _SAVE : InstSI < (outs), - (ins sgpr_class:$src, i32imm:$frame_idx, SReg_64:$scratch_ptr, + (ins sgpr_class:$src, i32imm:$frame_idx, SReg_128:$scratch_rsrc, SReg_32:$scratch_offset), "", [] >; def _RESTORE : InstSI < (outs sgpr_class:$dst), - (ins i32imm:$frame_idx, SReg_64:$scratch_ptr, SReg_32:$scratch_offset), + (ins i32imm:$frame_idx, SReg_128:$scratch_rsrc, SReg_32:$scratch_offset), "", [] >; } // End UseNamedOperandTable = 1 @@ -1974,14 +1974,14 @@ multiclass SI_SPILL_VGPR { let UseNamedOperandTable = 1 in { def _SAVE : InstSI < (outs), - (ins vgpr_class:$src, i32imm:$frame_idx, SReg_64:$scratch_ptr, + (ins vgpr_class:$src, i32imm:$frame_idx, SReg_128:$scratch_rsrc, SReg_32:$scratch_offset), "", [] >; def _RESTORE : InstSI < (outs vgpr_class:$dst), - (ins i32imm:$frame_idx, SReg_64:$scratch_ptr, SReg_32:$scratch_offset), + (ins i32imm:$frame_idx, SReg_128:$scratch_rsrc, SReg_32:$scratch_offset), "", [] >; } // End UseNamedOperandTable = 1 diff --git a/llvm/lib/Target/R600/SIPrepareScratchRegs.cpp b/llvm/lib/Target/R600/SIPrepareScratchRegs.cpp index f0e7edec6b48..b7934bd9e919 100644 --- a/llvm/lib/Target/R600/SIPrepareScratchRegs.cpp +++ b/llvm/lib/Target/R600/SIPrepareScratchRegs.cpp @@ -29,6 +29,7 @@ #include "llvm/IR/Function.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/Support/Debug.h" using namespace llvm; namespace { @@ -84,28 +85,10 @@ bool SIPrepareScratchRegs::runOnMachineFunction(MachineFunction &MF) { if (!Entry->isLiveIn(ScratchOffsetPreloadReg)) Entry->addLiveIn(ScratchOffsetPreloadReg); - // Load the scratch pointer - unsigned ScratchPtrReg = - TRI->findUnusedRegister(MRI, &AMDGPU::SGPR_64RegClass); - int ScratchPtrFI = -1; - - if (ScratchPtrReg != AMDGPU::NoRegister) { - // Found an SGPR to use. - MRI.setPhysRegUsed(ScratchPtrReg); - BuildMI(*Entry, I, DL, TII->get(AMDGPU::S_MOV_B64), ScratchPtrReg) - .addReg(ScratchPtrPreloadReg); - } else { - // No SGPR is available, we must spill. - ScratchPtrFI = FrameInfo->CreateSpillStackObject(8, 4); - BuildMI(*Entry, I, DL, TII->get(AMDGPU::SI_SPILL_S64_SAVE)) - .addReg(ScratchPtrPreloadReg) - .addFrameIndex(ScratchPtrFI); - } - // Load the scratch offset. unsigned ScratchOffsetReg = TRI->findUnusedRegister(MRI, &AMDGPU::SGPR_32RegClass); - int ScratchOffsetFI = ~0; + int ScratchOffsetFI = -1; if (ScratchOffsetReg != AMDGPU::NoRegister) { // Found an SGPR to use @@ -125,22 +108,26 @@ bool SIPrepareScratchRegs::runOnMachineFunction(MachineFunction &MF) { // add them to all the SI_SPILL_V* instructions. RegScavenger RS; - bool UseRegScavenger = - (ScratchPtrReg == AMDGPU::NoRegister || - ScratchOffsetReg == AMDGPU::NoRegister); + unsigned ScratchRsrcFI = FrameInfo->CreateSpillStackObject(16, 4); + RS.addScavengingFrameIndex(ScratchRsrcFI); + for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE; ++BI) { MachineBasicBlock &MBB = *BI; - if (UseRegScavenger) - RS.enterBasicBlock(&MBB); + // Add the scratch offset reg as a live-in so that the register scavenger + // doesn't re-use it. + if (!MBB.isLiveIn(ScratchOffsetReg)) + MBB.addLiveIn(ScratchOffsetReg); + RS.enterBasicBlock(&MBB); for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ++I) { MachineInstr &MI = *I; + RS.forward(I); DebugLoc DL = MI.getDebugLoc(); switch(MI.getOpcode()) { - default: break;; + default: break; case AMDGPU::SI_SPILL_V512_SAVE: case AMDGPU::SI_SPILL_V256_SAVE: case AMDGPU::SI_SPILL_V128_SAVE: @@ -153,18 +140,35 @@ bool SIPrepareScratchRegs::runOnMachineFunction(MachineFunction &MF) { case AMDGPU::SI_SPILL_V256_RESTORE: case AMDGPU::SI_SPILL_V512_RESTORE: - // Scratch Pointer - if (ScratchPtrReg == AMDGPU::NoRegister) { - ScratchPtrReg = RS.scavengeRegister(&AMDGPU::SGPR_64RegClass, 0); - BuildMI(MBB, I, DL, TII->get(AMDGPU::SI_SPILL_S64_RESTORE), - ScratchPtrReg) - .addFrameIndex(ScratchPtrFI) - .addReg(AMDGPU::NoRegister) - .addReg(AMDGPU::NoRegister); - } else if (!MBB.isLiveIn(ScratchPtrReg)) { - MBB.addLiveIn(ScratchPtrReg); - } + // Scratch resource + unsigned ScratchRsrcReg = + RS.scavengeRegister(&AMDGPU::SReg_128RegClass, 0); + uint64_t Rsrc = AMDGPU::RSRC_DATA_FORMAT | AMDGPU::RSRC_TID_ENABLE | + 0xffffffff; // Size + + unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0); + unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1); + unsigned Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2); + unsigned Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3); + + BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc0) + .addExternalSymbol("SCRATCH_RSRC_DWORD0") + .addReg(ScratchRsrcReg, RegState::ImplicitDefine); + + BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc1) + .addExternalSymbol("SCRATCH_RSRC_DWORD1") + .addReg(ScratchRsrcReg, RegState::ImplicitDefine); + + BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc2) + .addImm(Rsrc & 0xffffffff) + .addReg(ScratchRsrcReg, RegState::ImplicitDefine); + + BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc3) + .addImm(Rsrc >> 32) + .addReg(ScratchRsrcReg, RegState::ImplicitDefine); + + // Scratch Offset if (ScratchOffsetReg == AMDGPU::NoRegister) { ScratchOffsetReg = RS.scavengeRegister(&AMDGPU::SGPR_32RegClass, 0); BuildMI(MBB, I, DL, TII->get(AMDGPU::SI_SPILL_S32_RESTORE), @@ -176,20 +180,26 @@ bool SIPrepareScratchRegs::runOnMachineFunction(MachineFunction &MF) { MBB.addLiveIn(ScratchOffsetReg); } - if (ScratchPtrReg == AMDGPU::NoRegister || + if (ScratchRsrcReg == AMDGPU::NoRegister || ScratchOffsetReg == AMDGPU::NoRegister) { LLVMContext &Ctx = MF.getFunction()->getContext(); Ctx.emitError("ran out of SGPRs for spilling VGPRs"); - ScratchPtrReg = AMDGPU::SGPR0; + ScratchRsrcReg = AMDGPU::SGPR0; ScratchOffsetReg = AMDGPU::SGPR0; } - MI.getOperand(2).setReg(ScratchPtrReg); + MI.getOperand(2).setReg(ScratchRsrcReg); + MI.getOperand(2).setIsKill(true); + MI.getOperand(2).setIsUndef(false); MI.getOperand(3).setReg(ScratchOffsetReg); + MI.getOperand(3).setIsUndef(false); + MI.addOperand(MachineOperand::CreateReg(Rsrc0, false, true, true)); + MI.addOperand(MachineOperand::CreateReg(Rsrc1, false, true, true)); + MI.addOperand(MachineOperand::CreateReg(Rsrc2, false, true, true)); + MI.addOperand(MachineOperand::CreateReg(Rsrc3, false, true, true)); + MI.dump(); break; } - if (UseRegScavenger) - RS.forward(); } } return true; diff --git a/llvm/lib/Target/R600/SIRegisterInfo.cpp b/llvm/lib/Target/R600/SIRegisterInfo.cpp index 59ff8a27d057..321d25fce730 100644 --- a/llvm/lib/Target/R600/SIRegisterInfo.cpp +++ b/llvm/lib/Target/R600/SIRegisterInfo.cpp @@ -98,7 +98,7 @@ static unsigned getNumSubRegsForSpillOp(unsigned Op) { void SIRegisterInfo::buildScratchLoadStore(MachineBasicBlock::iterator MI, unsigned LoadStoreOp, unsigned Value, - unsigned ScratchPtr, + unsigned ScratchRsrcReg, unsigned ScratchOffset, int64_t Offset, RegScavenger *RS) const { @@ -113,34 +113,11 @@ void SIRegisterInfo::buildScratchLoadStore(MachineBasicBlock::iterator MI, bool RanOutOfSGPRs = false; unsigned SOffset = ScratchOffset; - unsigned RsrcReg = RS->scavengeRegister(&AMDGPU::SReg_128RegClass, MI, 0); - if (RsrcReg == AMDGPU::NoRegister) { - RanOutOfSGPRs = true; - RsrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3; - } - unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode()); unsigned Size = NumSubRegs * 4; - uint64_t Rsrc = AMDGPU::RSRC_DATA_FORMAT | AMDGPU::RSRC_TID_ENABLE | - 0xffffffff; // Size - - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B64), - getSubReg(RsrcReg, AMDGPU::sub0_sub1)) - .addReg(ScratchPtr) - .addReg(RsrcReg, RegState::ImplicitDefine); - - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), - getSubReg(RsrcReg, AMDGPU::sub2)) - .addImm(Rsrc & 0xffffffff) - .addReg(RsrcReg, RegState::ImplicitDefine); - - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), - getSubReg(RsrcReg, AMDGPU::sub3)) - .addImm(Rsrc >> 32) - .addReg(RsrcReg, RegState::ImplicitDefine); - if (!isUInt<12>(Offset + Size)) { + dbgs() << "Offset scavenge\n"; SOffset = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, MI, 0); if (SOffset == AMDGPU::NoRegister) { RanOutOfSGPRs = true; @@ -163,7 +140,7 @@ void SIRegisterInfo::buildScratchLoadStore(MachineBasicBlock::iterator MI, BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp)) .addReg(SubReg, getDefRegState(IsLoad)) - .addReg(RsrcReg, getKillRegState(IsKill)) + .addReg(ScratchRsrcReg, getKillRegState(IsKill)) .addImm(Offset) .addReg(SOffset, getKillRegState(IsKill)) .addImm(0) // glc @@ -236,6 +213,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, } if (isM0) { + dbgs() << "Scavenge M0\n"; SubReg = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, MI, 0); } @@ -262,7 +240,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, case AMDGPU::SI_SPILL_V32_SAVE: buildScratchLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET, TII->getNamedOperand(*MI, AMDGPU::OpName::src)->getReg(), - TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_ptr)->getReg(), + TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_rsrc)->getReg(), TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_offset)->getReg(), FrameInfo->getObjectOffset(Index), RS); MI->eraseFromParent(); @@ -274,7 +252,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, case AMDGPU::SI_SPILL_V512_RESTORE: { buildScratchLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET, TII->getNamedOperand(*MI, AMDGPU::OpName::dst)->getReg(), - TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_ptr)->getReg(), + TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_rsrc)->getReg(), TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_offset)->getReg(), FrameInfo->getObjectOffset(Index), RS); MI->eraseFromParent(); diff --git a/llvm/lib/Target/R600/SIRegisterInfo.h b/llvm/lib/Target/R600/SIRegisterInfo.h index d14212c2b104..8aa02c30a431 100644 --- a/llvm/lib/Target/R600/SIRegisterInfo.h +++ b/llvm/lib/Target/R600/SIRegisterInfo.h @@ -111,7 +111,7 @@ struct SIRegisterInfo : public AMDGPURegisterInfo { private: void buildScratchLoadStore(MachineBasicBlock::iterator MI, unsigned LoadStoreOp, unsigned Value, - unsigned ScratchPtr, unsigned ScratchOffset, + unsigned ScratchRsrcReg, unsigned ScratchOffset, int64_t Offset, RegScavenger *RS) const; };