[AMDGPU] gfx1010 constant bus limit

Constant bus limit has increased to 2 with GFX10.

Differential Revision: https://reviews.llvm.org/D61404

llvm-svn: 359754
This commit is contained in:
Stanislav Mekhanoshin 2019-05-02 03:47:23 +00:00
parent b929a0062e
commit f2baae0abb
4 changed files with 136 additions and 24 deletions

View File

@ -256,6 +256,26 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
*this, *static_cast<AMDGPURegisterBankInfo *>(RegBankInfo.get()), TM));
}
unsigned GCNSubtarget::getConstantBusLimit(unsigned Opcode) const {
if (getGeneration() < GFX10)
return 1;
switch (Opcode) {
case AMDGPU::V_LSHLREV_B64:
case AMDGPU::V_LSHLREV_B64_gfx10:
case AMDGPU::V_LSHL_B64:
case AMDGPU::V_LSHRREV_B64:
case AMDGPU::V_LSHRREV_B64_gfx10:
case AMDGPU::V_LSHR_B64:
case AMDGPU::V_ASHRREV_I64:
case AMDGPU::V_ASHRREV_I64_gfx10:
case AMDGPU::V_ASHR_I64:
return 1;
}
return 2;
}
unsigned AMDGPUSubtarget::getMaxLocalMemSizeWithWaveCount(unsigned NWaves,
const Function &F) const {
if (NWaves == 1)

View File

@ -443,6 +443,8 @@ public:
return MaxPrivateElementSize;
}
unsigned getConstantBusLimit(unsigned Opcode) const;
bool hasIntClamp() const {
return HasIntClamp;
}

View File

@ -2443,6 +2443,8 @@ bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
const unsigned Opcode = Inst.getOpcode();
const MCInstrDesc &Desc = MII.get(Opcode);
unsigned ConstantBusUseCount = 0;
unsigned NumLiterals = 0;
unsigned LiteralSize;
if (Desc.TSFlags &
(SIInstrFlags::VOPC |
@ -2454,8 +2456,10 @@ bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
++ConstantBusUseCount;
}
SmallDenseSet<unsigned> SGPRsUsed;
unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
if (SGPRUsed != AMDGPU::NoRegister) {
SGPRsUsed.insert(SGPRUsed);
++ConstantBusUseCount;
}
@ -2478,16 +2482,42 @@ bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
// flat_scratch_lo, flat_scratch_hi
// are theoretically valid but they are disabled anyway.
// Note that this code mimics SIInstrInfo::verifyInstruction
if (Reg != SGPRUsed) {
if (!SGPRsUsed.count(Reg)) {
SGPRsUsed.insert(Reg);
++ConstantBusUseCount;
}
SGPRUsed = Reg;
} else { // Expression or a literal
++ConstantBusUseCount;
if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
continue; // special operand like VINTERP attr_chan
// An instruction may use only one literal.
// This has been validated on the previous step.
// See validateVOP3Literal.
// This literal may be used as more than one operand.
// If all these operands are of the same size,
// this literal counts as one scalar value.
// Otherwise it counts as 2 scalar values.
// See "GFX10 Shader Programming", section 3.6.2.3.
unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
if (Size < 4) Size = 4;
if (NumLiterals == 0) {
NumLiterals = 1;
LiteralSize = Size;
} else if (LiteralSize != Size) {
NumLiterals = 2;
}
}
}
}
}
ConstantBusUseCount += NumLiterals;
if (isGFX10())
return ConstantBusUseCount <= 2;
return ConstantBusUseCount <= 1;
}

View File

@ -2151,9 +2151,11 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
Src0->ChangeToImmediate(Def->getOperand(1).getImm());
Src0Inlined = true;
} else if ((RI.isPhysicalRegister(Src0->getReg()) &&
RI.isSGPRClass(RI.getPhysRegClass(Src0->getReg()))) ||
(ST.getConstantBusLimit(Opc) <= 1 &&
RI.isSGPRClass(RI.getPhysRegClass(Src0->getReg())))) ||
(RI.isVirtualRegister(Src0->getReg()) &&
RI.isSGPRClass(MRI->getRegClass(Src0->getReg()))))
(ST.getConstantBusLimit(Opc) <= 1 &&
RI.isSGPRClass(MRI->getRegClass(Src0->getReg())))))
return false;
// VGPR is okay as Src0 - fallthrough
}
@ -2350,7 +2352,9 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineFunction::iterator &MBB,
if (!IsFMA && !Src0Mods && !Src1Mods && !Clamp && !Omod &&
// If we have an SGPR input, we will violate the constant bus restriction.
(!Src0->isReg() || !RI.isSGPRReg(MBB->getParent()->getRegInfo(), Src0->getReg()))) {
(ST.getConstantBusLimit(Opc) > 1 ||
!Src0->isReg() ||
!RI.isSGPRReg(MBB->getParent()->getRegInfo(), Src0->getReg()))) {
if (auto Imm = getFoldableImm(Src2)) {
return BuildMI(*MBB, MI, MI.getDebugLoc(),
get(IsF16 ? AMDGPU::V_MADAK_F16 : AMDGPU::V_MADAK_F32))
@ -3090,9 +3094,12 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1)
++ConstantBusCount;
SmallVector<unsigned, 2> SGPRsUsed;
unsigned SGPRUsed = findImplicitSGPRRead(MI);
if (SGPRUsed != AMDGPU::NoRegister)
if (SGPRUsed != AMDGPU::NoRegister) {
++ConstantBusCount;
SGPRsUsed.push_back(SGPRUsed);
}
for (int OpIdx : OpIndices) {
if (OpIdx == -1)
@ -3100,23 +3107,37 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
const MachineOperand &MO = MI.getOperand(OpIdx);
if (usesConstantBus(MRI, MO, MI.getDesc().OpInfo[OpIdx])) {
if (MO.isReg()) {
if (MO.getReg() != SGPRUsed)
++ConstantBusCount;
SGPRUsed = MO.getReg();
if (llvm::all_of(SGPRsUsed, [this, SGPRUsed](unsigned SGPR) {
return !RI.regsOverlap(SGPRUsed, SGPR);
})) {
++ConstantBusCount;
SGPRsUsed.push_back(SGPRUsed);
}
} else {
++ConstantBusCount;
++LiteralCount;
}
}
}
if (ConstantBusCount > 1) {
ErrInfo = "VOP* instruction uses the constant bus more than once";
const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
// v_writelane_b32 is an exception from constant bus restriction:
// vsrc0 can be sgpr, const or m0 and lane select sgpr, m0 or inline-const
if (ConstantBusCount > ST.getConstantBusLimit(Opcode) &&
Opcode != AMDGPU::V_WRITELANE_B32) {
ErrInfo = "VOP* instruction violates constant bus restriction";
return false;
}
if (isVOP3(MI) && LiteralCount) {
ErrInfo = "VOP3 instruction uses literal";
return false;
if (LiteralCount && !ST.hasVOP3Literal()) {
ErrInfo = "VOP3 instruction uses literal";
return false;
}
if (LiteralCount > 1) {
ErrInfo = "VOP3 instruction uses more than one literal";
return false;
}
}
}
@ -3509,31 +3530,47 @@ bool SIInstrInfo::isLegalVSrcOperand(const MachineRegisterInfo &MRI,
bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
const MachineOperand *MO) const {
const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
const MachineFunction &MF = *MI.getParent()->getParent();
const MachineRegisterInfo &MRI = MF.getRegInfo();
const MCInstrDesc &InstDesc = MI.getDesc();
const MCOperandInfo &OpInfo = InstDesc.OpInfo[OpIdx];
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const TargetRegisterClass *DefinedRC =
OpInfo.RegClass != -1 ? RI.getRegClass(OpInfo.RegClass) : nullptr;
if (!MO)
MO = &MI.getOperand(OpIdx);
int ConstantBusLimit = ST.getConstantBusLimit(MI.getOpcode());
int VOP3LiteralLimit = ST.hasVOP3Literal() ? 1 : 0;
if (isVALU(MI) && usesConstantBus(MRI, *MO, OpInfo)) {
if (isVOP3(MI) && isLiteralConstantLike(*MO, OpInfo) && !VOP3LiteralLimit--)
return false;
RegSubRegPair SGPRUsed;
SmallDenseSet<RegSubRegPair> SGPRsUsed;
if (MO->isReg())
SGPRUsed = RegSubRegPair(MO->getReg(), MO->getSubReg());
SGPRsUsed.insert(RegSubRegPair(MO->getReg(), MO->getSubReg()));
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
if (i == OpIdx)
continue;
const MachineOperand &Op = MI.getOperand(i);
if (Op.isReg()) {
if ((Op.getReg() != SGPRUsed.Reg || Op.getSubReg() != SGPRUsed.SubReg) &&
RegSubRegPair SGPR(Op.getReg(), Op.getSubReg());
if (!SGPRsUsed.count(SGPR) &&
usesConstantBus(MRI, Op, InstDesc.OpInfo[i])) {
return false;
if (--ConstantBusLimit <= 0)
return false;
SGPRsUsed.insert(SGPR);
}
} else if (InstDesc.OpInfo[i].OperandType == AMDGPU::OPERAND_KIMM32) {
return false;
if (--ConstantBusLimit <= 0)
return false;
} else if (isVOP3(MI) && AMDGPU::isSISrcOperand(InstDesc, i) &&
isLiteralConstantLike(Op, InstDesc.OpInfo[i])) {
if (!VOP3LiteralLimit--)
return false;
if (--ConstantBusLimit <= 0)
return false;
}
}
}
@ -3569,7 +3606,7 @@ void SIInstrInfo::legalizeOperandsVOP2(MachineRegisterInfo &MRI,
// disabled for the operand type for instructions because they will always
// violate the one constant bus use rule.
bool HasImplicitSGPR = findImplicitSGPRRead(MI) != AMDGPU::NoRegister;
if (HasImplicitSGPR) {
if (HasImplicitSGPR && ST.getConstantBusLimit(Opc) <= 1) {
int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
MachineOperand &Src0 = MI.getOperand(Src0Idx);
@ -3680,7 +3717,14 @@ void SIInstrInfo::legalizeOperandsVOP3(MachineRegisterInfo &MRI,
};
// Find the one SGPR operand we are allowed to use.
int ConstantBusLimit = ST.getConstantBusLimit(Opc);
int LiteralLimit = ST.hasVOP3Literal() ? 1 : 0;
SmallDenseSet<unsigned> SGPRsUsed;
unsigned SGPRReg = findUsedSGPR(MI, VOP3Idx);
if (SGPRReg != AMDGPU::NoRegister) {
SGPRsUsed.insert(SGPRReg);
--ConstantBusLimit;
}
for (unsigned i = 0; i < 3; ++i) {
int Idx = VOP3Idx[i];
@ -3688,16 +3732,32 @@ void SIInstrInfo::legalizeOperandsVOP3(MachineRegisterInfo &MRI,
break;
MachineOperand &MO = MI.getOperand(Idx);
// We should never see a VOP3 instruction with an illegal immediate operand.
if (!MO.isReg())
if (!MO.isReg()) {
if (!isLiteralConstantLike(MO, get(Opc).OpInfo[Idx]))
continue;
if (LiteralLimit > 0 && ConstantBusLimit > 0) {
--LiteralLimit;
--ConstantBusLimit;
continue;
}
--LiteralLimit;
--ConstantBusLimit;
legalizeOpWithMove(MI, Idx);
continue;
}
if (!RI.isSGPRClass(MRI.getRegClass(MO.getReg())))
continue; // VGPRs are legal
if (SGPRReg == AMDGPU::NoRegister || SGPRReg == MO.getReg()) {
SGPRReg = MO.getReg();
// We can use one SGPR in each VOP3 instruction.
// We can use one SGPR in each VOP3 instruction prior to GFX10
// and two starting from GFX10.
if (SGPRsUsed.count(MO.getReg()))
continue;
if (ConstantBusLimit > 0) {
SGPRsUsed.insert(MO.getReg());
--ConstantBusLimit;
continue;
}