AMDGPU: Fix getInstSizeInBytes

Summary:
Add some optional code to validate getInstSizeInBytes for emitted
instructions. This flushed out some issues which are fixed by this
patch:

- Streamline getInstSizeInBytes
- Properly define the VI readlane/writelane instruction as VOP3
- Fix the inline constant determination. Specifically, this change
  fixes an issue where a 32-bit value of 0xffffffff was recorded
  as unsigned. This is equal to -1 when restricting to a 32-bit
  comparison, and an inline constant can be used.

Reviewers: arsenm, rampitec

Subscribers: kzhuravl, wdng, yaxunl, dstuttard, tpr, t-tye, llvm-commits

Differential Revision: https://reviews.llvm.org/D50629

Change-Id: Id87c3b7975839da0de8156a124b0ce98c5fb47f2
llvm-svn: 340903
This commit is contained in:
Nicolai Haehnle 2018-08-29 07:46:09 +00:00
parent 025bb56a86
commit 283b995097
5 changed files with 46 additions and 35 deletions

View File

@ -301,6 +301,26 @@ void AMDGPUAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MCInstLowering.lower(MI, TmpInst);
EmitToStreamer(*OutStreamer, TmpInst);
#ifdef EXPENSIVE_CHECKS
// Sanity-check getInstSizeInBytes on explicitly specified CPUs (it cannot
// work correctly for the generic CPU).
//
// The isPseudo check really shouldn't be here, but unfortunately there are
// some negative lit tests that depend on being able to continue through
// here even when pseudo instructions haven't been lowered.
if (!MI->isPseudo() && STI.isCPUStringValid(STI.getCPU())) {
SmallVector<MCFixup, 4> Fixups;
SmallVector<char, 16> CodeBytes;
raw_svector_ostream CodeStream(CodeBytes);
std::unique_ptr<MCCodeEmitter> InstEmitter(createSIMCCodeEmitter(
*STI.getInstrInfo(), *OutContext.getRegisterInfo(), OutContext));
InstEmitter->encodeInstruction(TmpInst, CodeStream, Fixups, STI);
assert(CodeBytes.size() == STI.getInstrInfo()->getInstSizeInBytes(*MI));
}
#endif
if (STI.dumpCode()) {
// Disassemble instruction/operands to text.
DisasmLines.resize(DisasmLines.size() + 1);

View File

@ -2398,8 +2398,7 @@ bool SIInstrInfo::isInlineConstant(const MachineOperand &MO,
case AMDGPU::OPERAND_REG_INLINE_C_INT32:
case AMDGPU::OPERAND_REG_INLINE_C_FP32: {
int32_t Trunc = static_cast<int32_t>(Imm);
return Trunc == Imm &&
AMDGPU::isInlinableLiteral32(Trunc, ST.hasInv2PiInlineImm());
return AMDGPU::isInlinableLiteral32(Trunc, ST.hasInv2PiInlineImm());
}
case AMDGPU::OPERAND_REG_IMM_INT64:
case AMDGPU::OPERAND_REG_IMM_FP64:
@ -4975,12 +4974,6 @@ unsigned SIInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
// If we have a definitive size, we can use it. Otherwise we need to inspect
// the operands to know the size.
//
// FIXME: Instructions that have a base 32-bit encoding report their size as
// 4, even though they are really 8 bytes if they have a literal operand.
if (DescSize != 0 && DescSize != 4)
return DescSize;
if (isFixedSize(MI))
return DescSize;
@ -4989,24 +4982,28 @@ unsigned SIInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
if (isVALU(MI) || isSALU(MI)) {
int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
if (Src0Idx == -1)
return 4; // No operands.
return DescSize; // No operands.
if (isLiteralConstantLike(MI.getOperand(Src0Idx), Desc.OpInfo[Src0Idx]))
return 8;
return DescSize + 4;
int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
if (Src1Idx == -1)
return 4;
return DescSize;
if (isLiteralConstantLike(MI.getOperand(Src1Idx), Desc.OpInfo[Src1Idx]))
return 8;
return DescSize + 4;
return 4;
int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
if (Src2Idx == -1)
return DescSize;
if (isLiteralConstantLike(MI.getOperand(Src2Idx), Desc.OpInfo[Src2Idx]))
return DescSize + 4;
return DescSize;
}
if (DescSize == 4)
return 4;
switch (Opc) {
case TargetOpcode::IMPLICIT_DEF:
case TargetOpcode::KILL:
@ -5021,7 +5018,7 @@ unsigned SIInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo());
}
default:
llvm_unreachable("unable to find instruction size");
return DescSize;
}
}

View File

@ -716,12 +716,6 @@ class VOP2_DPP <bits<6> op, VOP2_Pseudo ps, string OpName = ps.OpName, VOPProfil
let AssemblerPredicates = [isVI], DecoderNamespace = "VI" in {
multiclass VOP32_Real_vi <bits<10> op> {
def _vi :
VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.VI>,
VOP3e_vi<op, !cast<VOP2_Pseudo>(NAME).Pfl>;
}
multiclass VOP2_Real_MADK_vi <bits<6> op> {
def _vi : VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.VI>,
VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>;
@ -899,9 +893,6 @@ defm V_ADD_U32 : VOP2_Real_e32e64_gfx9 <0x34>;
defm V_SUB_U32 : VOP2_Real_e32e64_gfx9 <0x35>;
defm V_SUBREV_U32 : VOP2_Real_e32e64_gfx9 <0x36>;
defm V_READLANE_B32 : VOP32_Real_vi <0x289>;
defm V_WRITELANE_B32 : VOP32_Real_vi <0x28a>;
defm V_BFM_B32 : VOP2_Real_e64only_vi <0x293>;
defm V_BCNT_U32_B32 : VOP2_Real_e64only_vi <0x28b>;
defm V_MBCNT_LO_U32_B32 : VOP2_Real_e64only_vi <0x28c>;

View File

@ -651,23 +651,23 @@ defm V_MAD_I64_I32 : VOP3be_Real_ci <0x177>;
let AssemblerPredicates = [isVI], DecoderNamespace = "VI" in {
multiclass VOP3_Real_vi<bits<10> op> {
def _vi : VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.VI>,
VOP3e_vi <op, !cast<VOP3_Pseudo>(NAME).Pfl>;
def _vi : VOP3_Real<!cast<VOP_Pseudo>(NAME), SIEncodingFamily.VI>,
VOP3e_vi <op, !cast<VOP_Pseudo>(NAME).Pfl>;
}
multiclass VOP3be_Real_vi<bits<10> op> {
def _vi : VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.VI>,
VOP3be_vi <op, !cast<VOP3_Pseudo>(NAME).Pfl>;
def _vi : VOP3_Real<!cast<VOP_Pseudo>(NAME), SIEncodingFamily.VI>,
VOP3be_vi <op, !cast<VOP_Pseudo>(NAME).Pfl>;
}
multiclass VOP3OpSel_Real_gfx9<bits<10> op> {
def _vi : VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.VI>,
VOP3OpSel_gfx9 <op, !cast<VOP3_Pseudo>(NAME).Pfl>;
def _vi : VOP3_Real<!cast<VOP_Pseudo>(NAME), SIEncodingFamily.VI>,
VOP3OpSel_gfx9 <op, !cast<VOP_Pseudo>(NAME).Pfl>;
}
multiclass VOP3Interp_Real_vi<bits<10> op> {
def _vi : VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.VI>,
VOP3Interp_vi <op, !cast<VOP3_Pseudo>(NAME).Pfl>;
def _vi : VOP3_Real<!cast<VOP_Pseudo>(NAME), SIEncodingFamily.VI>,
VOP3Interp_vi <op, !cast<VOP_Pseudo>(NAME).Pfl>;
}
} // End AssemblerPredicates = [isVI], DecoderNamespace = "VI"
@ -813,6 +813,9 @@ defm V_MUL_LO_I32 : VOP3_Real_vi <0x285>;
defm V_MUL_HI_U32 : VOP3_Real_vi <0x286>;
defm V_MUL_HI_I32 : VOP3_Real_vi <0x287>;
defm V_READLANE_B32 : VOP3_Real_vi <0x289>;
defm V_WRITELANE_B32 : VOP3_Real_vi <0x28a>;
defm V_LSHLREV_B64 : VOP3_Real_vi <0x28f>;
defm V_LSHRREV_B64 : VOP3_Real_vi <0x290>;
defm V_ASHRREV_I64 : VOP3_Real_vi <0x291>;

View File

@ -1,4 +1,4 @@
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=tahiti -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx802 -verify-machineinstrs < %s | FileCheck %s
declare i32 @llvm.amdgcn.writelane(i32, i32, i32) #0