AMDGPU: Rename isAmdCodeObjectV2 -> isAmdHsaOrMesa
The isAmdCodeObjectV2 is a misleading name which actually checks whether the os is amdhsa or mesa. Also add a test to make sure we do not generate old kernel header for code object v3. Differential Revision: https://reviews.llvm.org/D52897 llvm-svn: 343813
This commit is contained in:
parent
bbe5d55fea
commit
aa067cb9fb
|
@ -193,13 +193,10 @@ void AMDGPUAsmPrinter::EmitFunctionBodyStart() {
|
|||
const SIMachineFunctionInfo &MFI = *MF->getInfo<SIMachineFunctionInfo>();
|
||||
if (!MFI.isEntryFunction())
|
||||
return;
|
||||
if (IsaInfo::hasCodeObjectV3(getSTI()) &&
|
||||
TM.getTargetTriple().getOS() == Triple::AMDHSA)
|
||||
return;
|
||||
|
||||
const GCNSubtarget &STM = MF->getSubtarget<GCNSubtarget>();
|
||||
const Function &F = MF->getFunction();
|
||||
if (STM.isAmdCodeObjectV2(F) &&
|
||||
if (!STM.hasCodeObjectV3() && STM.isAmdHsaOrMesa(F) &&
|
||||
(F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
|
||||
F.getCallingConv() == CallingConv::SPIR_KERNEL)) {
|
||||
amd_kernel_code_t KernelCode;
|
||||
|
@ -210,7 +207,8 @@ void AMDGPUAsmPrinter::EmitFunctionBodyStart() {
|
|||
if (TM.getTargetTriple().getOS() != Triple::AMDHSA)
|
||||
return;
|
||||
|
||||
HSAMetadataStream.emitKernel(*MF, CurrentProgramInfo);
|
||||
if (!STM.hasCodeObjectV3() && STM.isAmdHsaOS())
|
||||
HSAMetadataStream.emitKernel(*MF, CurrentProgramInfo);
|
||||
}
|
||||
|
||||
void AMDGPUAsmPrinter::EmitFunctionBodyEnd() {
|
||||
|
@ -259,7 +257,7 @@ void AMDGPUAsmPrinter::EmitFunctionEntryLabel() {
|
|||
|
||||
const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
|
||||
const GCNSubtarget &STM = MF->getSubtarget<GCNSubtarget>();
|
||||
if (MFI->isEntryFunction() && STM.isAmdCodeObjectV2(MF->getFunction())) {
|
||||
if (MFI->isEntryFunction() && STM.isAmdHsaOrMesa(MF->getFunction())) {
|
||||
SmallString<128> SymbolName;
|
||||
getNameWithPrefix(SymbolName, &MF->getFunction()),
|
||||
getTargetStreamer()->EmitAMDGPUSymbolType(
|
||||
|
|
|
@ -135,7 +135,7 @@ public:
|
|||
return isMesa3DOS() && !AMDGPU::isShader(F.getCallingConv());
|
||||
}
|
||||
|
||||
bool isAmdCodeObjectV2(const Function &F) const {
|
||||
bool isAmdHsaOrMesa(const Function &F) const {
|
||||
return isAmdHsaOS() || isMesaKernel(F);
|
||||
}
|
||||
|
||||
|
@ -202,7 +202,7 @@ public:
|
|||
/// Returns the offset in bytes from the start of the input buffer
|
||||
/// of the first explicit kernel argument.
|
||||
unsigned getExplicitKernelArgOffset(const Function &F) const {
|
||||
return isAmdCodeObjectV2(F) ? 0 : 36;
|
||||
return isAmdHsaOrMesa(F) ? 0 : 36;
|
||||
}
|
||||
|
||||
/// \returns Maximum number of work groups per compute unit supported by the
|
||||
|
|
|
@ -289,7 +289,7 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
|
|||
AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET);
|
||||
|
||||
unsigned PreloadedPrivateBufferReg = AMDGPU::NoRegister;
|
||||
if (ST.isAmdCodeObjectV2(F)) {
|
||||
if (ST.isAmdHsaOrMesa(F)) {
|
||||
PreloadedPrivateBufferReg = MFI->getPreloadedReg(
|
||||
AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER);
|
||||
}
|
||||
|
@ -308,7 +308,7 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
|
|||
}
|
||||
|
||||
if (ResourceRegUsed && PreloadedPrivateBufferReg != AMDGPU::NoRegister) {
|
||||
assert(ST.isAmdCodeObjectV2(F) || ST.isMesaGfxShader(F));
|
||||
assert(ST.isAmdHsaOrMesa(F) || ST.isMesaGfxShader(F));
|
||||
MRI.addLiveIn(PreloadedPrivateBufferReg);
|
||||
MBB.addLiveIn(PreloadedPrivateBufferReg);
|
||||
}
|
||||
|
@ -333,7 +333,7 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
|
|||
|
||||
bool CopyBuffer = ResourceRegUsed &&
|
||||
PreloadedPrivateBufferReg != AMDGPU::NoRegister &&
|
||||
ST.isAmdCodeObjectV2(F) &&
|
||||
ST.isAmdHsaOrMesa(F) &&
|
||||
ScratchRsrcReg != PreloadedPrivateBufferReg;
|
||||
|
||||
// This needs to be careful of the copying order to avoid overwriting one of
|
||||
|
@ -433,7 +433,7 @@ void SIFrameLowering::emitEntryFunctionScratchSetup(const GCNSubtarget &ST,
|
|||
}
|
||||
if (ST.isMesaGfxShader(Fn)
|
||||
|| (PreloadedPrivateBufferReg == AMDGPU::NoRegister)) {
|
||||
assert(!ST.isAmdCodeObjectV2(Fn));
|
||||
assert(!ST.isAmdHsaOrMesa(Fn));
|
||||
const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
|
||||
|
||||
unsigned Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);
|
||||
|
|
|
@ -1653,7 +1653,7 @@ static void reservePrivateMemoryRegs(const TargetMachine &TM,
|
|||
bool RequiresStackAccess = HasStackObjects || MFI.hasCalls();
|
||||
|
||||
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
||||
if (ST.isAmdCodeObjectV2(MF.getFunction())) {
|
||||
if (ST.isAmdHsaOrMesa(MF.getFunction())) {
|
||||
if (RequiresStackAccess) {
|
||||
// If we have stack objects, we unquestionably need the private buffer
|
||||
// resource. For the Code Object V2 ABI, this will be the first 4 user
|
||||
|
@ -4809,14 +4809,14 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
|||
|
||||
switch (IntrinsicID) {
|
||||
case Intrinsic::amdgcn_implicit_buffer_ptr: {
|
||||
if (getSubtarget()->isAmdCodeObjectV2(MF.getFunction()))
|
||||
if (getSubtarget()->isAmdHsaOrMesa(MF.getFunction()))
|
||||
return emitNonHSAIntrinsicError(DAG, DL, VT);
|
||||
return getPreloadedValue(DAG, *MFI, VT,
|
||||
AMDGPUFunctionArgInfo::IMPLICIT_BUFFER_PTR);
|
||||
}
|
||||
case Intrinsic::amdgcn_dispatch_ptr:
|
||||
case Intrinsic::amdgcn_queue_ptr: {
|
||||
if (!Subtarget->isAmdCodeObjectV2(MF.getFunction())) {
|
||||
if (!Subtarget->isAmdHsaOrMesa(MF.getFunction())) {
|
||||
DiagnosticInfoUnsupported BadIntrin(
|
||||
MF.getFunction(), "unsupported hsa intrinsic without hsa target",
|
||||
DL.getDebugLoc());
|
||||
|
|
|
@ -137,8 +137,8 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
|
|||
}
|
||||
}
|
||||
|
||||
bool IsCOV2 = ST.isAmdCodeObjectV2(F);
|
||||
if (IsCOV2) {
|
||||
bool isAmdHsaOrMesa = ST.isAmdHsaOrMesa(F);
|
||||
if (isAmdHsaOrMesa) {
|
||||
if (HasStackObjects || MaySpill)
|
||||
PrivateSegmentBuffer = true;
|
||||
|
||||
|
@ -158,7 +158,7 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
|
|||
if (F.hasFnAttribute("amdgpu-kernarg-segment-ptr"))
|
||||
KernargSegmentPtr = true;
|
||||
|
||||
if (ST.hasFlatAddressSpace() && isEntryFunction() && IsCOV2) {
|
||||
if (ST.hasFlatAddressSpace() && isEntryFunction() && isAmdHsaOrMesa) {
|
||||
// TODO: This could be refined a lot. The attribute is a poor way of
|
||||
// detecting calls that may require it before argument lowering.
|
||||
if (HasStackObjects || F.hasFnAttribute("amdgpu-flat-scratch"))
|
||||
|
|
|
@ -2,6 +2,10 @@
|
|||
; RUN: llc -filetype=obj -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -mattr=+code-object-v3 < %s | llvm-readobj -elf-output-style=GNU -notes -relocations -sections -symbols | FileCheck --check-prefixes=ALL-ELF,OSABI-AMDHSA-ELF %s
|
||||
|
||||
; ALL-ASM-LABEL: {{^}}fadd:
|
||||
|
||||
; OSABI-AMDHSA-ASM-NOT: .amdgpu_hsa_kernel
|
||||
; OSABI-AMDHSA-ASM-NOT: .amd_kernel_code_t
|
||||
|
||||
; OSABI-AMDHSA-ASM: s_endpgm
|
||||
; OSABI-AMDHSA-ASM: .section .rodata,#alloc
|
||||
; OSABI-AMDHSA-ASM: .p2align 6
|
||||
|
@ -16,6 +20,10 @@
|
|||
; OSABI-AMDHSA-ASM: .text
|
||||
|
||||
; ALL-ASM-LABEL: {{^}}fsub:
|
||||
|
||||
; OSABI-AMDHSA-ASM-NOT: .amdgpu_hsa_kernel
|
||||
; OSABI-AMDHSA-ASM-NOT: .amd_kernel_code_t
|
||||
|
||||
; OSABI-AMDHSA-ASM: s_endpgm
|
||||
; OSABI-AMDHSA-ASM: .section .rodata,#alloc
|
||||
; OSABI-AMDHSA-ASM: .p2align 6
|
||||
|
|
Loading…
Reference in New Issue