AMDGPU: Add num spilled s/vgprs to metadata

This was requested by tools.

Differential Revision: https://reviews.llvm.org/D40321

llvm-svn: 319192
This commit is contained in:
Konstantin Zhuravlyov 2017-11-28 17:51:08 +00:00
parent 5011298958
commit 06ae4ec78e
6 changed files with 153 additions and 17 deletions

View File

@ -1295,6 +1295,16 @@ non-AMD key names should be prefixed by "*vendor-name*.".
code is capable of
supporting XNACK. See
:ref:`amdgpu-target-features`.
"NumSpilledSGPRs" integer Number of stores from
a scalar register to
a register allocator
created spill
location.
"NumSpilledVGPRs" integer Number of stores from
a vector register to
a register allocator
created spill
location.
============================ ============== ========= =====================
..

View File

@ -244,6 +244,10 @@ constexpr char MaxFlatWorkGroupSize[] = "MaxFlatWorkGroupSize";
constexpr char IsDynamicCallStack[] = "IsDynamicCallStack";
/// \brief Key for Kernel::CodeProps::Metadata::mIsXNACKEnabled.
constexpr char IsXNACKEnabled[] = "IsXNACKEnabled";
/// \brief Key for Kernel::CodeProps::Metadata::mNumSpilledSGPRs.
constexpr char NumSpilledSGPRs[] = "NumSpilledSGPRs";
/// \brief Key for Kernel::CodeProps::Metadata::mNumSpilledVGPRs.
constexpr char NumSpilledVGPRs[] = "NumSpilledVGPRs";
} // end namespace Key
/// \brief In-memory representation of kernel code properties metadata.
@ -275,6 +279,10 @@ struct Metadata final {
/// \brief True if the generated machine code is capable of supporting XNACK.
/// Optional.
bool mIsXNACKEnabled = false;
/// \brief Number of SGPRs spilled by a wavefront. Optional.
uint16_t mNumSpilledSGPRs = 0;
/// \brief Number of VGPRs spilled by a workitem. Optional.
uint16_t mNumSpilledVGPRs = 0;
/// \brief Default constructor.
Metadata() = default;

View File

@ -148,6 +148,10 @@ struct MappingTraits<Kernel::CodeProps::Metadata> {
MD.mIsDynamicCallStack, false);
YIO.mapOptional(Kernel::CodeProps::Key::IsXNACKEnabled,
MD.mIsXNACKEnabled, false);
YIO.mapOptional(Kernel::CodeProps::Key::NumSpilledSGPRs,
MD.mNumSpilledSGPRs, uint16_t(0));
YIO.mapOptional(Kernel::CodeProps::Key::NumSpilledVGPRs,
MD.mNumSpilledVGPRs, uint16_t(0));
}
};

View File

@ -1188,6 +1188,8 @@ AMDGPU::HSAMD::Kernel::CodeProps::Metadata AMDGPUAsmPrinter::getHSACodeProps(
HSACodeProps.mMaxFlatWorkGroupSize = MFI.getMaxFlatWorkGroupSize();
HSACodeProps.mIsDynamicCallStack = ProgramInfo.DynamicCallStack;
HSACodeProps.mIsXNACKEnabled = STM.isXNACKEnabled();
HSACodeProps.mNumSpilledSGPRs = MFI.getNumSpilledSGPRs();
HSACodeProps.mNumSpilledVGPRs = MFI.getNumSpilledVGPRs();
return HSACodeProps;
}

View File

@ -1,26 +1,26 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX700 --check-prefix=NOTES %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx800 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX800 --check-prefix=NOTES %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX803 --check-prefix=NOTES %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=NOTES %s
@var = addrspace(1) global float 0.0
; CHECK: ---
; CHECK: Version: [ 1, 0 ]
; CHECK: Kernels:
; CHECK: - Name: test
; CHECK: SymbolName: 'test@kd'
; CHECK: CodeProps:
; CHECK: KernargSegmentSize: 24
; CHECK: GroupSegmentFixedSize: 0
; CHECK: PrivateSegmentFixedSize: 0
; CHECK: KernargSegmentAlign: 8
; CHECK: WavefrontSize: 64
; GFX700: NumSGPRs: 6
; GFX800: NumSGPRs: 96
; GFX900: NumSGPRs: 6
; GFX700: NumVGPRs: 4
; GFX800: NumVGPRs: 6
; GFX900: NumVGPRs: 6
; CHECK: MaxFlatWorkGroupSize: 256
; CHECK: - Name: test
; CHECK: SymbolName: 'test@kd'
; CHECK: CodeProps:
; CHECK: KernargSegmentSize: 24
; CHECK: GroupSegmentFixedSize: 0
; CHECK: PrivateSegmentFixedSize: 0
; CHECK: KernargSegmentAlign: 8
; CHECK: WavefrontSize: 64
; CHECK: NumSGPRs: 6
; GFX700: NumVGPRs: 4
; GFX803: NumVGPRs: 6
; GFX900: NumVGPRs: 6
; CHECK: MaxFlatWorkGroupSize: 256
define amdgpu_kernel void @test(
half addrspace(1)* %r,
half addrspace(1)* %a,
@ -32,3 +32,111 @@ entry:
store half %r.val, half addrspace(1)* %r
ret void
}
; CHECK: - Name: num_spilled_sgprs
; CHECK: SymbolName: 'num_spilled_sgprs@kd'
; CHECK: CodeProps:
; CHECK: NumSpilledSGPRs: 41
define amdgpu_kernel void @num_spilled_sgprs(
i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 addrspace(1)* %out2,
i32 addrspace(1)* %out3, i32 addrspace(1)* %out4, i32 addrspace(1)* %out5,
i32 addrspace(1)* %out6, i32 addrspace(1)* %out7, i32 addrspace(1)* %out8,
i32 addrspace(1)* %out9, i32 addrspace(1)* %outa, i32 addrspace(1)* %outb,
i32 addrspace(1)* %outc, i32 addrspace(1)* %outd, i32 addrspace(1)* %oute,
i32 addrspace(1)* %outf, i32 %in0, i32 %in1, i32 %in2, i32 %in3, i32 %in4,
i32 %in5, i32 %in6, i32 %in7, i32 %in8, i32 %in9, i32 %ina, i32 %inb,
i32 %inc, i32 %ind, i32 %ine, i32 %inf) #0 {
entry:
store i32 %in0, i32 addrspace(1)* %out0
store i32 %in1, i32 addrspace(1)* %out1
store i32 %in2, i32 addrspace(1)* %out2
store i32 %in3, i32 addrspace(1)* %out3
store i32 %in4, i32 addrspace(1)* %out4
store i32 %in5, i32 addrspace(1)* %out5
store i32 %in6, i32 addrspace(1)* %out6
store i32 %in7, i32 addrspace(1)* %out7
store i32 %in8, i32 addrspace(1)* %out8
store i32 %in9, i32 addrspace(1)* %out9
store i32 %ina, i32 addrspace(1)* %outa
store i32 %inb, i32 addrspace(1)* %outb
store i32 %inc, i32 addrspace(1)* %outc
store i32 %ind, i32 addrspace(1)* %outd
store i32 %ine, i32 addrspace(1)* %oute
store i32 %inf, i32 addrspace(1)* %outf
ret void
}
; CHECK: - Name: num_spilled_vgprs
; CHECK: SymbolName: 'num_spilled_vgprs@kd'
; CHECK: CodeProps:
; CHECK: NumSpilledVGPRs: 14
define amdgpu_kernel void @num_spilled_vgprs() #1 {
%val0 = load volatile float, float addrspace(1)* @var
%val1 = load volatile float, float addrspace(1)* @var
%val2 = load volatile float, float addrspace(1)* @var
%val3 = load volatile float, float addrspace(1)* @var
%val4 = load volatile float, float addrspace(1)* @var
%val5 = load volatile float, float addrspace(1)* @var
%val6 = load volatile float, float addrspace(1)* @var
%val7 = load volatile float, float addrspace(1)* @var
%val8 = load volatile float, float addrspace(1)* @var
%val9 = load volatile float, float addrspace(1)* @var
%val10 = load volatile float, float addrspace(1)* @var
%val11 = load volatile float, float addrspace(1)* @var
%val12 = load volatile float, float addrspace(1)* @var
%val13 = load volatile float, float addrspace(1)* @var
%val14 = load volatile float, float addrspace(1)* @var
%val15 = load volatile float, float addrspace(1)* @var
%val16 = load volatile float, float addrspace(1)* @var
%val17 = load volatile float, float addrspace(1)* @var
%val18 = load volatile float, float addrspace(1)* @var
%val19 = load volatile float, float addrspace(1)* @var
%val20 = load volatile float, float addrspace(1)* @var
%val21 = load volatile float, float addrspace(1)* @var
%val22 = load volatile float, float addrspace(1)* @var
%val23 = load volatile float, float addrspace(1)* @var
%val24 = load volatile float, float addrspace(1)* @var
%val25 = load volatile float, float addrspace(1)* @var
%val26 = load volatile float, float addrspace(1)* @var
%val27 = load volatile float, float addrspace(1)* @var
%val28 = load volatile float, float addrspace(1)* @var
%val29 = load volatile float, float addrspace(1)* @var
%val30 = load volatile float, float addrspace(1)* @var
store volatile float %val0, float addrspace(1)* @var
store volatile float %val1, float addrspace(1)* @var
store volatile float %val2, float addrspace(1)* @var
store volatile float %val3, float addrspace(1)* @var
store volatile float %val4, float addrspace(1)* @var
store volatile float %val5, float addrspace(1)* @var
store volatile float %val6, float addrspace(1)* @var
store volatile float %val7, float addrspace(1)* @var
store volatile float %val8, float addrspace(1)* @var
store volatile float %val9, float addrspace(1)* @var
store volatile float %val10, float addrspace(1)* @var
store volatile float %val11, float addrspace(1)* @var
store volatile float %val12, float addrspace(1)* @var
store volatile float %val13, float addrspace(1)* @var
store volatile float %val14, float addrspace(1)* @var
store volatile float %val15, float addrspace(1)* @var
store volatile float %val16, float addrspace(1)* @var
store volatile float %val17, float addrspace(1)* @var
store volatile float %val18, float addrspace(1)* @var
store volatile float %val19, float addrspace(1)* @var
store volatile float %val20, float addrspace(1)* @var
store volatile float %val21, float addrspace(1)* @var
store volatile float %val22, float addrspace(1)* @var
store volatile float %val23, float addrspace(1)* @var
store volatile float %val24, float addrspace(1)* @var
store volatile float %val25, float addrspace(1)* @var
store volatile float %val26, float addrspace(1)* @var
store volatile float %val27, float addrspace(1)* @var
store volatile float %val28, float addrspace(1)* @var
store volatile float %val29, float addrspace(1)* @var
store volatile float %val30, float addrspace(1)* @var
ret void
}
attributes #0 = { "amdgpu-num-sgpr"="14" }
attributes #1 = { "amdgpu-num-vgpr"="20" }

View File

@ -14,6 +14,8 @@
// CHECK: KernargSegmentAlign: 16
// CHECK: WavefrontSize: 64
// CHECK: MaxFlatWorkGroupSize: 256
// CHECK: NumSpilledSGPRs: 1
// CHECK: NumSpilledVGPRs: 1
.amd_amdgpu_hsa_metadata
Version: [ 1, 0 ]
Printf: [ '1:1:4:%d\n', '2:1:8:%g\n' ]
@ -27,4 +29,6 @@
KernargSegmentAlign: 16
WavefrontSize: 64
MaxFlatWorkGroupSize: 256
NumSpilledSGPRs: 1
NumSpilledVGPRs: 1
.end_amd_amdgpu_hsa_metadata