From aa067cb9fb3ac5771d734296d683a886307a8129 Mon Sep 17 00:00:00 2001 From: Konstantin Zhuravlyov Date: Thu, 4 Oct 2018 21:02:16 +0000 Subject: [PATCH] AMDGPU: Rename isAmdCodeObjectV2 -> isAmdHsaOrMesa The isAmdCodeObjectV2 is a misleading name which actually checks whether the os is amdhsa or mesa. Also add a test to make sure we do not generate old kernel header for code object v3. Differential Revision: https://reviews.llvm.org/D52897 llvm-svn: 343813 --- llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp | 10 ++++------ llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h | 4 ++-- llvm/lib/Target/AMDGPU/SIFrameLowering.cpp | 8 ++++---- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 6 +++--- llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp | 6 +++--- llvm/test/CodeGen/AMDGPU/code-object-v3.ll | 8 ++++++++ 6 files changed, 24 insertions(+), 18 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index 80a29a886338..7e6a406b1e34 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -193,13 +193,10 @@ void AMDGPUAsmPrinter::EmitFunctionBodyStart() { const SIMachineFunctionInfo &MFI = *MF->getInfo(); if (!MFI.isEntryFunction()) return; - if (IsaInfo::hasCodeObjectV3(getSTI()) && - TM.getTargetTriple().getOS() == Triple::AMDHSA) - return; const GCNSubtarget &STM = MF->getSubtarget(); const Function &F = MF->getFunction(); - if (STM.isAmdCodeObjectV2(F) && + if (!STM.hasCodeObjectV3() && STM.isAmdHsaOrMesa(F) && (F.getCallingConv() == CallingConv::AMDGPU_KERNEL || F.getCallingConv() == CallingConv::SPIR_KERNEL)) { amd_kernel_code_t KernelCode; @@ -210,7 +207,8 @@ void AMDGPUAsmPrinter::EmitFunctionBodyStart() { if (TM.getTargetTriple().getOS() != Triple::AMDHSA) return; - HSAMetadataStream.emitKernel(*MF, CurrentProgramInfo); + if (!STM.hasCodeObjectV3() && STM.isAmdHsaOS()) + HSAMetadataStream.emitKernel(*MF, CurrentProgramInfo); } void AMDGPUAsmPrinter::EmitFunctionBodyEnd() { @@ -259,7 +257,7 @@ void AMDGPUAsmPrinter::EmitFunctionEntryLabel() { const SIMachineFunctionInfo *MFI = MF->getInfo(); const GCNSubtarget &STM = MF->getSubtarget(); - if (MFI->isEntryFunction() && STM.isAmdCodeObjectV2(MF->getFunction())) { + if (MFI->isEntryFunction() && STM.isAmdHsaOrMesa(MF->getFunction())) { SmallString<128> SymbolName; getNameWithPrefix(SymbolName, &MF->getFunction()), getTargetStreamer()->EmitAMDGPUSymbolType( diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h index 8bd6415c9953..fb39dc4493cf 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -135,7 +135,7 @@ public: return isMesa3DOS() && !AMDGPU::isShader(F.getCallingConv()); } - bool isAmdCodeObjectV2(const Function &F) const { + bool isAmdHsaOrMesa(const Function &F) const { return isAmdHsaOS() || isMesaKernel(F); } @@ -202,7 +202,7 @@ public: /// Returns the offset in bytes from the start of the input buffer /// of the first explicit kernel argument. unsigned getExplicitKernelArgOffset(const Function &F) const { - return isAmdCodeObjectV2(F) ? 0 : 36; + return isAmdHsaOrMesa(F) ? 0 : 36; } /// \returns Maximum number of work groups per compute unit supported by the diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp index ac0ef90f25a4..e4633c88e18f 100644 --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -289,7 +289,7 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF, AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET); unsigned PreloadedPrivateBufferReg = AMDGPU::NoRegister; - if (ST.isAmdCodeObjectV2(F)) { + if (ST.isAmdHsaOrMesa(F)) { PreloadedPrivateBufferReg = MFI->getPreloadedReg( AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER); } @@ -308,7 +308,7 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF, } if (ResourceRegUsed && PreloadedPrivateBufferReg != AMDGPU::NoRegister) { - assert(ST.isAmdCodeObjectV2(F) || ST.isMesaGfxShader(F)); + assert(ST.isAmdHsaOrMesa(F) || ST.isMesaGfxShader(F)); MRI.addLiveIn(PreloadedPrivateBufferReg); MBB.addLiveIn(PreloadedPrivateBufferReg); } @@ -333,7 +333,7 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF, bool CopyBuffer = ResourceRegUsed && PreloadedPrivateBufferReg != AMDGPU::NoRegister && - ST.isAmdCodeObjectV2(F) && + ST.isAmdHsaOrMesa(F) && ScratchRsrcReg != PreloadedPrivateBufferReg; // This needs to be careful of the copying order to avoid overwriting one of @@ -433,7 +433,7 @@ void SIFrameLowering::emitEntryFunctionScratchSetup(const GCNSubtarget &ST, } if (ST.isMesaGfxShader(Fn) || (PreloadedPrivateBufferReg == AMDGPU::NoRegister)) { - assert(!ST.isAmdCodeObjectV2(Fn)); + assert(!ST.isAmdHsaOrMesa(Fn)); const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32); unsigned Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2); diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 7f1467c48dd4..062232873961 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -1653,7 +1653,7 @@ static void reservePrivateMemoryRegs(const TargetMachine &TM, bool RequiresStackAccess = HasStackObjects || MFI.hasCalls(); const GCNSubtarget &ST = MF.getSubtarget(); - if (ST.isAmdCodeObjectV2(MF.getFunction())) { + if (ST.isAmdHsaOrMesa(MF.getFunction())) { if (RequiresStackAccess) { // If we have stack objects, we unquestionably need the private buffer // resource. For the Code Object V2 ABI, this will be the first 4 user @@ -4809,14 +4809,14 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, switch (IntrinsicID) { case Intrinsic::amdgcn_implicit_buffer_ptr: { - if (getSubtarget()->isAmdCodeObjectV2(MF.getFunction())) + if (getSubtarget()->isAmdHsaOrMesa(MF.getFunction())) return emitNonHSAIntrinsicError(DAG, DL, VT); return getPreloadedValue(DAG, *MFI, VT, AMDGPUFunctionArgInfo::IMPLICIT_BUFFER_PTR); } case Intrinsic::amdgcn_dispatch_ptr: case Intrinsic::amdgcn_queue_ptr: { - if (!Subtarget->isAmdCodeObjectV2(MF.getFunction())) { + if (!Subtarget->isAmdHsaOrMesa(MF.getFunction())) { DiagnosticInfoUnsupported BadIntrin( MF.getFunction(), "unsupported hsa intrinsic without hsa target", DL.getDebugLoc()); diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index 0d5ff75e37ed..ee1ff85523ad 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -137,8 +137,8 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) } } - bool IsCOV2 = ST.isAmdCodeObjectV2(F); - if (IsCOV2) { + bool isAmdHsaOrMesa = ST.isAmdHsaOrMesa(F); + if (isAmdHsaOrMesa) { if (HasStackObjects || MaySpill) PrivateSegmentBuffer = true; @@ -158,7 +158,7 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) if (F.hasFnAttribute("amdgpu-kernarg-segment-ptr")) KernargSegmentPtr = true; - if (ST.hasFlatAddressSpace() && isEntryFunction() && IsCOV2) { + if (ST.hasFlatAddressSpace() && isEntryFunction() && isAmdHsaOrMesa) { // TODO: This could be refined a lot. The attribute is a poor way of // detecting calls that may require it before argument lowering. if (HasStackObjects || F.hasFnAttribute("amdgpu-flat-scratch")) diff --git a/llvm/test/CodeGen/AMDGPU/code-object-v3.ll b/llvm/test/CodeGen/AMDGPU/code-object-v3.ll index 2291527f7771..3f13cebc1583 100644 --- a/llvm/test/CodeGen/AMDGPU/code-object-v3.ll +++ b/llvm/test/CodeGen/AMDGPU/code-object-v3.ll @@ -2,6 +2,10 @@ ; RUN: llc -filetype=obj -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -mattr=+code-object-v3 < %s | llvm-readobj -elf-output-style=GNU -notes -relocations -sections -symbols | FileCheck --check-prefixes=ALL-ELF,OSABI-AMDHSA-ELF %s ; ALL-ASM-LABEL: {{^}}fadd: + +; OSABI-AMDHSA-ASM-NOT: .amdgpu_hsa_kernel +; OSABI-AMDHSA-ASM-NOT: .amd_kernel_code_t + ; OSABI-AMDHSA-ASM: s_endpgm ; OSABI-AMDHSA-ASM: .section .rodata,#alloc ; OSABI-AMDHSA-ASM: .p2align 6 @@ -16,6 +20,10 @@ ; OSABI-AMDHSA-ASM: .text ; ALL-ASM-LABEL: {{^}}fsub: + +; OSABI-AMDHSA-ASM-NOT: .amdgpu_hsa_kernel +; OSABI-AMDHSA-ASM-NOT: .amd_kernel_code_t + ; OSABI-AMDHSA-ASM: s_endpgm ; OSABI-AMDHSA-ASM: .section .rodata,#alloc ; OSABI-AMDHSA-ASM: .p2align 6