R600/SI: Spill VGPRs to scratch space for compute shaders

llvm-svn: 225988
This commit is contained in:
Tom Stellard 2015-01-14 15:42:31 +00:00
parent d657321aef
commit 42fb60e1a7
11 changed files with 351 additions and 94 deletions

View File

@ -47,6 +47,7 @@ FunctionPass *createSIFixSGPRCopiesPass(TargetMachine &tm);
FunctionPass *createSIFixSGPRLiveRangesPass(); FunctionPass *createSIFixSGPRLiveRangesPass();
FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS); FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS);
FunctionPass *createSIInsertWaits(TargetMachine &tm); FunctionPass *createSIInsertWaits(TargetMachine &tm);
FunctionPass *createSIPrepareScratchRegs();
void initializeSIFoldOperandsPass(PassRegistry &); void initializeSIFoldOperandsPass(PassRegistry &);
extern char &SIFoldOperandsID; extern char &SIFoldOperandsID;

View File

@ -189,6 +189,7 @@ void AMDGPUPassConfig::addPostRegAlloc() {
const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(); const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
if (ST.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) { if (ST.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) {
addPass(createSIPrepareScratchRegs(), false);
addPass(createSIShrinkInstructionsPass(), false); addPass(createSIShrinkInstructionsPass(), false);
} }
} }

View File

@ -51,6 +51,7 @@ add_llvm_target(R600CodeGen
SILowerControlFlow.cpp SILowerControlFlow.cpp
SILowerI1Copies.cpp SILowerI1Copies.cpp
SIMachineFunctionInfo.cpp SIMachineFunctionInfo.cpp
SIPrepareScratchRegs.cpp
SIRegisterInfo.cpp SIRegisterInfo.cpp
SIShrinkInstructions.cpp SIShrinkInstructions.cpp
SITypeRewriter.cpp SITypeRewriter.cpp

View File

@ -433,13 +433,9 @@ unsigned SIInstrInfo::getMovOpcode(const TargetRegisterClass *DstRC) const {
static bool shouldTryToSpillVGPRs(MachineFunction *MF) { static bool shouldTryToSpillVGPRs(MachineFunction *MF) {
SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
const TargetMachine &TM = MF->getTarget();
// FIXME: Even though it can cause problems, we need to enable // FIXME: Implement spilling for other shader types.
// spilling at -O0, since the fast register allocator always return MFI->getShaderType() == ShaderType::COMPUTE;
// spills registers that are live at the end of blocks.
return MFI->getShaderType() == ShaderType::COMPUTE &&
TM.getOptLevel() == CodeGenOpt::None;
} }
@ -450,6 +446,7 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
const TargetRegisterClass *RC, const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const { const TargetRegisterInfo *TRI) const {
MachineFunction *MF = MBB.getParent(); MachineFunction *MF = MBB.getParent();
SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
MachineFrameInfo *FrameInfo = MF->getFrameInfo(); MachineFrameInfo *FrameInfo = MF->getFrameInfo();
DebugLoc DL = MBB.findDebugLoc(MI); DebugLoc DL = MBB.findDebugLoc(MI);
int Opcode = -1; int Opcode = -1;
@ -466,6 +463,8 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
case 512: Opcode = AMDGPU::SI_SPILL_S512_SAVE; break; case 512: Opcode = AMDGPU::SI_SPILL_S512_SAVE; break;
} }
} else if(shouldTryToSpillVGPRs(MF) && RI.hasVGPRs(RC)) { } else if(shouldTryToSpillVGPRs(MF) && RI.hasVGPRs(RC)) {
MFI->setHasSpilledVGPRs();
switch(RC->getSize() * 8) { switch(RC->getSize() * 8) {
case 32: Opcode = AMDGPU::SI_SPILL_V32_SAVE; break; case 32: Opcode = AMDGPU::SI_SPILL_V32_SAVE; break;
case 64: Opcode = AMDGPU::SI_SPILL_V64_SAVE; break; case 64: Opcode = AMDGPU::SI_SPILL_V64_SAVE; break;
@ -480,7 +479,11 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
FrameInfo->setObjectAlignment(FrameIndex, 4); FrameInfo->setObjectAlignment(FrameIndex, 4);
BuildMI(MBB, MI, DL, get(Opcode)) BuildMI(MBB, MI, DL, get(Opcode))
.addReg(SrcReg) .addReg(SrcReg)
.addFrameIndex(FrameIndex); .addFrameIndex(FrameIndex)
// Place-holder registers, these will be filled in by
// SIPrepareScratchRegs.
.addReg(AMDGPU::SGPR0_SGPR1, RegState::Undef)
.addReg(AMDGPU::SGPR0, RegState::Undef);
} else { } else {
LLVMContext &Ctx = MF->getFunction()->getContext(); LLVMContext &Ctx = MF->getFunction()->getContext();
Ctx.emitError("SIInstrInfo::storeRegToStackSlot - Do not know how to" Ctx.emitError("SIInstrInfo::storeRegToStackSlot - Do not know how to"
@ -522,7 +525,12 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
if (Opcode != -1) { if (Opcode != -1) {
FrameInfo->setObjectAlignment(FrameIndex, 4); FrameInfo->setObjectAlignment(FrameIndex, 4);
BuildMI(MBB, MI, DL, get(Opcode), DestReg) BuildMI(MBB, MI, DL, get(Opcode), DestReg)
.addFrameIndex(FrameIndex); .addFrameIndex(FrameIndex)
// Place-holder registers, these will be filled in by
// SIPrepareScratchRegs.
.addReg(AMDGPU::SGPR0_SGPR1, RegState::Undef)
.addReg(AMDGPU::SGPR0, RegState::Undef);
} else { } else {
LLVMContext &Ctx = MF->getFunction()->getContext(); LLVMContext &Ctx = MF->getFunction()->getContext();
Ctx.emitError("SIInstrInfo::loadRegFromStackSlot - Do not know how to" Ctx.emitError("SIInstrInfo::loadRegFromStackSlot - Do not know how to"
@ -553,7 +561,7 @@ unsigned SIInstrInfo::calculateLDSSpillAddress(MachineBasicBlock &MBB,
MachineBasicBlock::iterator Insert = Entry.front(); MachineBasicBlock::iterator Insert = Entry.front();
DebugLoc DL = Insert->getDebugLoc(); DebugLoc DL = Insert->getDebugLoc();
TIDReg = RI.findUnusedVGPR(MF->getRegInfo()); TIDReg = RI.findUnusedRegister(MF->getRegInfo(), &AMDGPU::VGPR_32RegClass);
if (TIDReg == AMDGPU::NoRegister) if (TIDReg == AMDGPU::NoRegister)
return TIDReg; return TIDReg;

View File

@ -1763,6 +1763,7 @@ multiclass MUBUF_Load_Helper_vi <bits<7> op, string asm, RegisterClass regClass,
multiclass MUBUF_Store_Helper <bits<7> op, string name, RegisterClass vdataClass, multiclass MUBUF_Store_Helper <bits<7> op, string name, RegisterClass vdataClass,
ValueType store_vt, SDPatternOperator st> { ValueType store_vt, SDPatternOperator st> {
let mayLoad = 0, mayStore = 1 in {
let addr64 = 0 in { let addr64 = 0 in {
def "" : MUBUF_si < def "" : MUBUF_si <
@ -1820,6 +1821,7 @@ multiclass MUBUF_Store_Helper <bits<7> op, string name, RegisterClass vdataClass
let tfe = 0; let tfe = 0;
let soffset = 128; // ZERO let soffset = 128; // ZERO
} }
} // End mayLoad = 0, mayStore = 1
} }
class FLAT_Load_Helper <bits<7> op, string asm, RegisterClass regClass> : class FLAT_Load_Helper <bits<7> op, string asm, RegisterClass regClass> :

View File

@ -1940,18 +1940,20 @@ def V_SUB_F64 : InstSI <
multiclass SI_SPILL_SGPR <RegisterClass sgpr_class> { multiclass SI_SPILL_SGPR <RegisterClass sgpr_class> {
def _SAVE : InstSI < let UseNamedOperandTable = 1 in {
(outs), def _SAVE : InstSI <
(ins sgpr_class:$src, i32imm:$frame_idx), (outs),
"", [] (ins sgpr_class:$src, i32imm:$frame_idx, SReg_64:$scratch_ptr,
>; SReg_32:$scratch_offset),
"", []
def _RESTORE : InstSI < >;
(outs sgpr_class:$dst),
(ins i32imm:$frame_idx),
"", []
>;
def _RESTORE : InstSI <
(outs sgpr_class:$dst),
(ins i32imm:$frame_idx, SReg_64:$scratch_ptr, SReg_32:$scratch_offset),
"", []
>;
} // End UseNamedOperandTable = 1
} }
defm SI_SPILL_S32 : SI_SPILL_SGPR <SReg_32>; defm SI_SPILL_S32 : SI_SPILL_SGPR <SReg_32>;
@ -1961,17 +1963,20 @@ defm SI_SPILL_S256 : SI_SPILL_SGPR <SReg_256>;
defm SI_SPILL_S512 : SI_SPILL_SGPR <SReg_512>; defm SI_SPILL_S512 : SI_SPILL_SGPR <SReg_512>;
multiclass SI_SPILL_VGPR <RegisterClass vgpr_class> { multiclass SI_SPILL_VGPR <RegisterClass vgpr_class> {
def _SAVE : InstSI < let UseNamedOperandTable = 1 in {
(outs), def _SAVE : InstSI <
(ins vgpr_class:$src, i32imm:$frame_idx), (outs),
"", [] (ins vgpr_class:$src, i32imm:$frame_idx, SReg_64:$scratch_ptr,
>; SReg_32:$scratch_offset),
"", []
>;
def _RESTORE : InstSI < def _RESTORE : InstSI <
(outs vgpr_class:$dst), (outs vgpr_class:$dst),
(ins i32imm:$frame_idx), (ins i32imm:$frame_idx, SReg_64:$scratch_ptr, SReg_32:$scratch_offset),
"", [] "", []
>; >;
} // End UseNamedOperandTable = 1
} }
defm SI_SPILL_V32 : SI_SPILL_VGPR <VGPR_32>; defm SI_SPILL_V32 : SI_SPILL_VGPR <VGPR_32>;

View File

@ -29,6 +29,7 @@ void SIMachineFunctionInfo::anchor() {}
SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
: AMDGPUMachineFunction(MF), : AMDGPUMachineFunction(MF),
TIDReg(AMDGPU::NoRegister), TIDReg(AMDGPU::NoRegister),
HasSpilledVGPRs(false),
PSInputAddr(0), PSInputAddr(0),
NumUserSGPRs(0), NumUserSGPRs(0),
LDSWaveSpillSize(0) { } LDSWaveSpillSize(0) { }
@ -50,7 +51,7 @@ SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg(
struct SpilledReg Spill; struct SpilledReg Spill;
if (!LaneVGPRs.count(LaneVGPRIdx)) { if (!LaneVGPRs.count(LaneVGPRIdx)) {
unsigned LaneVGPR = TRI->findUnusedVGPR(MRI); unsigned LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass);
LaneVGPRs[LaneVGPRIdx] = LaneVGPR; LaneVGPRs[LaneVGPRIdx] = LaneVGPR;
MRI.setPhysRegUsed(LaneVGPR); MRI.setPhysRegUsed(LaneVGPR);

View File

@ -29,6 +29,7 @@ class SIMachineFunctionInfo : public AMDGPUMachineFunction {
void anchor() override; void anchor() override;
unsigned TIDReg; unsigned TIDReg;
bool HasSpilledVGPRs;
public: public:
@ -52,6 +53,8 @@ public:
bool hasCalculatedTID() const { return TIDReg != AMDGPU::NoRegister; }; bool hasCalculatedTID() const { return TIDReg != AMDGPU::NoRegister; };
unsigned getTIDReg() const { return TIDReg; }; unsigned getTIDReg() const { return TIDReg; };
void setTIDReg(unsigned Reg) { TIDReg = Reg; } void setTIDReg(unsigned Reg) { TIDReg = Reg; }
bool hasSpilledVGPRs() const { return HasSpilledVGPRs; }
void setHasSpilledVGPRs(bool Spill = true) { HasSpilledVGPRs = Spill; }
unsigned getMaximumWorkGroupSize(const MachineFunction &MF) const; unsigned getMaximumWorkGroupSize(const MachineFunction &MF) const;
}; };

View File

@ -0,0 +1,196 @@
//===-- SIPrepareScratchRegs.cpp - Use predicates for control flow --------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
/// \file
///
/// This pass loads scratch pointer and scratch offset into a register or a
/// frame index which can be used anywhere in the program. These values will
/// be used for spilling VGPRs.
///
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
#include "AMDGPUSubtarget.h"
#include "SIDefines.h"
#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/LLVMContext.h"
using namespace llvm;
namespace {
class SIPrepareScratchRegs : public MachineFunctionPass {
private:
static char ID;
public:
SIPrepareScratchRegs() : MachineFunctionPass(ID) { }
bool runOnMachineFunction(MachineFunction &MF) override;
const char *getPassName() const override {
return "SI prepare scratch registers";
}
};
} // End anonymous namespace
char SIPrepareScratchRegs::ID = 0;
FunctionPass *llvm::createSIPrepareScratchRegs() {
return new SIPrepareScratchRegs();
}
bool SIPrepareScratchRegs::runOnMachineFunction(MachineFunction &MF) {
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
const SIInstrInfo *TII =
static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
const SIRegisterInfo *TRI = &TII->getRegisterInfo();
MachineRegisterInfo &MRI = MF.getRegInfo();
MachineFrameInfo *FrameInfo = MF.getFrameInfo();
MachineBasicBlock *Entry = MF.begin();
MachineBasicBlock::iterator I = Entry->begin();
DebugLoc DL = I->getDebugLoc();
// FIXME: If we don't have enough VGPRs for SGPR spilling we will need to
// run this pass.
if (!MFI->hasSpilledVGPRs())
return false;
unsigned ScratchPtrPreloadReg =
TRI->getPreloadedValue(MF, SIRegisterInfo::SCRATCH_PTR);
unsigned ScratchOffsetPreloadReg =
TRI->getPreloadedValue(MF, SIRegisterInfo::SCRATCH_WAVE_OFFSET);
if (!Entry->isLiveIn(ScratchPtrPreloadReg))
Entry->addLiveIn(ScratchPtrPreloadReg);
if (!Entry->isLiveIn(ScratchOffsetPreloadReg))
Entry->addLiveIn(ScratchOffsetPreloadReg);
// Load the scratch pointer
unsigned ScratchPtrReg =
TRI->findUnusedRegister(MRI, &AMDGPU::SGPR_64RegClass);
int ScratchPtrFI = -1;
if (ScratchPtrReg != AMDGPU::NoRegister) {
// Found an SGPR to use.
MRI.setPhysRegUsed(ScratchPtrReg);
BuildMI(*Entry, I, DL, TII->get(AMDGPU::S_MOV_B64), ScratchPtrReg)
.addReg(ScratchPtrPreloadReg);
} else {
// No SGPR is available, we must spill.
ScratchPtrFI = FrameInfo->CreateSpillStackObject(8, 4);
BuildMI(*Entry, I, DL, TII->get(AMDGPU::SI_SPILL_S64_SAVE))
.addReg(ScratchPtrPreloadReg)
.addFrameIndex(ScratchPtrFI);
}
// Load the scratch offset.
unsigned ScratchOffsetReg =
TRI->findUnusedRegister(MRI, &AMDGPU::SGPR_32RegClass);
int ScratchOffsetFI = ~0;
if (ScratchOffsetReg != AMDGPU::NoRegister) {
// Found an SGPR to use
MRI.setPhysRegUsed(ScratchOffsetReg);
BuildMI(*Entry, I, DL, TII->get(AMDGPU::S_MOV_B32), ScratchOffsetReg)
.addReg(ScratchOffsetPreloadReg);
} else {
// No SGPR is available, we must spill.
ScratchOffsetFI = FrameInfo->CreateSpillStackObject(4,4);
BuildMI(*Entry, I, DL, TII->get(AMDGPU::SI_SPILL_S32_SAVE))
.addReg(ScratchOffsetPreloadReg)
.addFrameIndex(ScratchOffsetFI);
}
// Now that we have the scratch pointer and offset values, we need to
// add them to all the SI_SPILL_V* instructions.
RegScavenger RS;
bool UseRegScavenger =
(ScratchPtrReg == AMDGPU::NoRegister ||
ScratchOffsetReg == AMDGPU::NoRegister);
for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
BI != BE; ++BI) {
MachineBasicBlock &MBB = *BI;
if (UseRegScavenger)
RS.enterBasicBlock(&MBB);
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
I != E; ++I) {
MachineInstr &MI = *I;
DebugLoc DL = MI.getDebugLoc();
switch(MI.getOpcode()) {
default: break;;
case AMDGPU::SI_SPILL_V512_SAVE:
case AMDGPU::SI_SPILL_V256_SAVE:
case AMDGPU::SI_SPILL_V128_SAVE:
case AMDGPU::SI_SPILL_V96_SAVE:
case AMDGPU::SI_SPILL_V64_SAVE:
case AMDGPU::SI_SPILL_V32_SAVE:
case AMDGPU::SI_SPILL_V32_RESTORE:
case AMDGPU::SI_SPILL_V64_RESTORE:
case AMDGPU::SI_SPILL_V128_RESTORE:
case AMDGPU::SI_SPILL_V256_RESTORE:
case AMDGPU::SI_SPILL_V512_RESTORE:
// Scratch Pointer
if (ScratchPtrReg == AMDGPU::NoRegister) {
ScratchPtrReg = RS.scavengeRegister(&AMDGPU::SGPR_64RegClass, 0);
BuildMI(MBB, I, DL, TII->get(AMDGPU::SI_SPILL_S64_RESTORE),
ScratchPtrReg)
.addFrameIndex(ScratchPtrFI)
.addReg(AMDGPU::NoRegister)
.addReg(AMDGPU::NoRegister);
} else if (!MBB.isLiveIn(ScratchPtrReg)) {
MBB.addLiveIn(ScratchPtrReg);
}
if (ScratchOffsetReg == AMDGPU::NoRegister) {
ScratchOffsetReg = RS.scavengeRegister(&AMDGPU::SGPR_32RegClass, 0);
BuildMI(MBB, I, DL, TII->get(AMDGPU::SI_SPILL_S32_RESTORE),
ScratchOffsetReg)
.addFrameIndex(ScratchOffsetFI)
.addReg(AMDGPU::NoRegister)
.addReg(AMDGPU::NoRegister);
} else if (!MBB.isLiveIn(ScratchOffsetReg)) {
MBB.addLiveIn(ScratchOffsetReg);
}
if (ScratchPtrReg == AMDGPU::NoRegister ||
ScratchOffsetReg == AMDGPU::NoRegister) {
LLVMContext &Ctx = MF.getFunction()->getContext();
Ctx.emitError("ran out of SGPRs for spilling VGPRs");
ScratchPtrReg = AMDGPU::SGPR0;
ScratchOffsetReg = AMDGPU::SGPR0;
}
MI.getOperand(2).setReg(ScratchPtrReg);
MI.getOperand(3).setReg(ScratchOffsetReg);
break;
}
if (UseRegScavenger)
RS.forward();
}
}
return true;
}

View File

@ -23,6 +23,7 @@
#include "llvm/IR/Function.h" #include "llvm/IR/Function.h"
#include "llvm/IR/LLVMContext.h" #include "llvm/IR/LLVMContext.h"
#include "llvm/Support/Debug.h"
using namespace llvm; using namespace llvm;
SIRegisterInfo::SIRegisterInfo(const AMDGPUSubtarget &st) SIRegisterInfo::SIRegisterInfo(const AMDGPUSubtarget &st)
@ -94,6 +95,84 @@ static unsigned getNumSubRegsForSpillOp(unsigned Op) {
} }
} }
void SIRegisterInfo::buildScratchLoadStore(MachineBasicBlock::iterator MI,
unsigned LoadStoreOp,
unsigned Value,
unsigned ScratchPtr,
unsigned ScratchOffset,
int64_t Offset,
RegScavenger *RS) const {
const SIInstrInfo *TII = static_cast<const SIInstrInfo*>(ST.getInstrInfo());
MachineBasicBlock *MBB = MI->getParent();
const MachineFunction *MF = MI->getParent()->getParent();
LLVMContext &Ctx = MF->getFunction()->getContext();
DebugLoc DL = MI->getDebugLoc();
bool IsLoad = TII->get(LoadStoreOp).mayLoad();
bool RanOutOfSGPRs = false;
unsigned SOffset = ScratchOffset;
unsigned RsrcReg = RS->scavengeRegister(&AMDGPU::SReg_128RegClass, MI, 0);
if (RsrcReg == AMDGPU::NoRegister) {
RanOutOfSGPRs = true;
RsrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
}
unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
unsigned Size = NumSubRegs * 4;
uint64_t Rsrc = AMDGPU::RSRC_DATA_FORMAT | AMDGPU::RSRC_TID_ENABLE |
0xffffffff; // Size
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B64),
getSubReg(RsrcReg, AMDGPU::sub0_sub1))
.addReg(ScratchPtr)
.addReg(RsrcReg, RegState::ImplicitDefine);
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32),
getSubReg(RsrcReg, AMDGPU::sub2))
.addImm(Rsrc & 0xffffffff)
.addReg(RsrcReg, RegState::ImplicitDefine);
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32),
getSubReg(RsrcReg, AMDGPU::sub3))
.addImm(Rsrc >> 32)
.addReg(RsrcReg, RegState::ImplicitDefine);
if (!isUInt<12>(Offset + Size)) {
SOffset = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, MI, 0);
if (SOffset == AMDGPU::NoRegister) {
RanOutOfSGPRs = true;
SOffset = AMDGPU::SGPR0;
}
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), SOffset)
.addReg(ScratchOffset)
.addImm(Offset);
Offset = 0;
}
if (RanOutOfSGPRs)
Ctx.emitError("Ran out of SGPRs for spilling VGPRS");
for (unsigned i = 0, e = NumSubRegs; i != e; ++i, Offset += 4) {
unsigned SubReg = NumSubRegs > 1 ?
getPhysRegSubReg(Value, &AMDGPU::VGPR_32RegClass, i) :
Value;
bool IsKill = (i == e - 1);
BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
.addReg(SubReg, getDefRegState(IsLoad))
.addReg(RsrcReg, getKillRegState(IsKill))
.addImm(Offset)
.addReg(SOffset, getKillRegState(IsKill))
.addImm(0) // glc
.addImm(0) // slc
.addImm(0) // tfe
.addReg(Value, RegState::Implicit | getDefRegState(IsLoad));
}
}
void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
int SPAdj, unsigned FIOperandNum, int SPAdj, unsigned FIOperandNum,
RegScavenger *RS) const { RegScavenger *RS) const {
@ -162,7 +241,8 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READLANE_B32), SubReg) BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READLANE_B32), SubReg)
.addReg(Spill.VGPR) .addReg(Spill.VGPR)
.addImm(Spill.Lane); .addImm(Spill.Lane)
.addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine);
if (isM0) { if (isM0) {
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0) BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
.addReg(SubReg); .addReg(SubReg);
@ -179,71 +259,24 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
case AMDGPU::SI_SPILL_V128_SAVE: case AMDGPU::SI_SPILL_V128_SAVE:
case AMDGPU::SI_SPILL_V96_SAVE: case AMDGPU::SI_SPILL_V96_SAVE:
case AMDGPU::SI_SPILL_V64_SAVE: case AMDGPU::SI_SPILL_V64_SAVE:
case AMDGPU::SI_SPILL_V32_SAVE: { case AMDGPU::SI_SPILL_V32_SAVE:
unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode()); buildScratchLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET,
unsigned SrcReg = MI->getOperand(0).getReg(); TII->getNamedOperand(*MI, AMDGPU::OpName::src)->getReg(),
int64_t Offset = FrameInfo->getObjectOffset(Index); TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_ptr)->getReg(),
unsigned Size = NumSubRegs * 4; TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_offset)->getReg(),
unsigned TmpReg = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0); FrameInfo->getObjectOffset(Index), RS);
for (unsigned i = 0, e = NumSubRegs; i != e; ++i) {
unsigned SubReg = NumSubRegs > 1 ?
getPhysRegSubReg(SrcReg, &AMDGPU::VGPR_32RegClass, i) :
SrcReg;
Offset += (i * 4);
MFI->LDSWaveSpillSize = std::max((unsigned)Offset + 4, (unsigned)MFI->LDSWaveSpillSize);
unsigned AddrReg = TII->calculateLDSSpillAddress(*MBB, MI, RS, TmpReg,
Offset, Size);
if (AddrReg == AMDGPU::NoRegister) {
LLVMContext &Ctx = MF->getFunction()->getContext();
Ctx.emitError("Ran out of VGPRs for spilling VGPRS");
AddrReg = AMDGPU::VGPR0;
}
// Store the value in LDS
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::DS_WRITE_B32))
.addImm(0) // gds
.addReg(AddrReg, RegState::Kill) // addr
.addReg(SubReg) // data0
.addImm(0); // offset
}
MI->eraseFromParent(); MI->eraseFromParent();
break; break;
}
case AMDGPU::SI_SPILL_V32_RESTORE: case AMDGPU::SI_SPILL_V32_RESTORE:
case AMDGPU::SI_SPILL_V64_RESTORE: case AMDGPU::SI_SPILL_V64_RESTORE:
case AMDGPU::SI_SPILL_V128_RESTORE: case AMDGPU::SI_SPILL_V128_RESTORE:
case AMDGPU::SI_SPILL_V256_RESTORE: case AMDGPU::SI_SPILL_V256_RESTORE:
case AMDGPU::SI_SPILL_V512_RESTORE: { case AMDGPU::SI_SPILL_V512_RESTORE: {
unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode()); buildScratchLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET,
unsigned DstReg = MI->getOperand(0).getReg(); TII->getNamedOperand(*MI, AMDGPU::OpName::dst)->getReg(),
int64_t Offset = FrameInfo->getObjectOffset(Index); TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_ptr)->getReg(),
unsigned Size = NumSubRegs * 4; TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_offset)->getReg(),
unsigned TmpReg = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0); FrameInfo->getObjectOffset(Index), RS);
// FIXME: We could use DS_READ_B64 here to optimize for larger registers.
for (unsigned i = 0, e = NumSubRegs; i != e; ++i) {
unsigned SubReg = NumSubRegs > 1 ?
getPhysRegSubReg(DstReg, &AMDGPU::VGPR_32RegClass, i) :
DstReg;
Offset += (i * 4);
unsigned AddrReg = TII->calculateLDSSpillAddress(*MBB, MI, RS, TmpReg,
Offset, Size);
if (AddrReg == AMDGPU::NoRegister) {
LLVMContext &Ctx = MF->getFunction()->getContext();
Ctx.emitError("Ran out of VGPRs for spilling VGPRs");
AddrReg = AMDGPU::VGPR0;
}
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::DS_READ_B32), SubReg)
.addImm(0) // gds
.addReg(AddrReg, RegState::Kill) // addr
.addImm(0); //offset
}
MI->eraseFromParent(); MI->eraseFromParent();
break; break;
} }
@ -431,9 +464,8 @@ unsigned SIRegisterInfo::getPreloadedValue(const MachineFunction &MF,
/// \brief Returns a register that is not used at any point in the function. /// \brief Returns a register that is not used at any point in the function.
/// If all registers are used, then this function will return /// If all registers are used, then this function will return
// AMDGPU::NoRegister. // AMDGPU::NoRegister.
unsigned SIRegisterInfo::findUnusedVGPR(const MachineRegisterInfo &MRI) const { unsigned SIRegisterInfo::findUnusedRegister(const MachineRegisterInfo &MRI,
const TargetRegisterClass *RC) const {
const TargetRegisterClass *RC = &AMDGPU::VGPR_32RegClass;
for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
I != E; ++I) { I != E; ++I) {

View File

@ -105,7 +105,14 @@ struct SIRegisterInfo : public AMDGPURegisterInfo {
unsigned getPreloadedValue(const MachineFunction &MF, unsigned getPreloadedValue(const MachineFunction &MF,
enum PreloadedValue Value) const; enum PreloadedValue Value) const;
unsigned findUnusedVGPR(const MachineRegisterInfo &MRI) const; unsigned findUnusedRegister(const MachineRegisterInfo &MRI,
const TargetRegisterClass *RC) const;
private:
void buildScratchLoadStore(MachineBasicBlock::iterator MI,
unsigned LoadStoreOp, unsigned Value,
unsigned ScratchPtr, unsigned ScratchOffset,
int64_t Offset, RegScavenger *RS) const;
}; };
} // End namespace llvm } // End namespace llvm