AMDGPU: Don't align callable functions to 256
llvm-svn: 300720
This commit is contained in:
parent
4c1ecded63
commit
6cb7b8a42f
|
@ -184,9 +184,11 @@ void AMDGPUAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
|
|||
}
|
||||
|
||||
bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
|
||||
const AMDGPUMachineFunction *MFI = MF.getInfo<AMDGPUMachineFunction>();
|
||||
|
||||
// The starting address of all shader programs must be 256 bytes aligned.
|
||||
MF.setAlignment(8);
|
||||
// Regular functions just need the basic required instruction alignment.
|
||||
MF.setAlignment(MFI->isEntryFunction() ? 8 : 2);
|
||||
|
||||
SetupMachineFunction(MF);
|
||||
|
||||
|
|
|
@ -0,0 +1,18 @@
|
|||
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri < %s | FileCheck -check-prefix=HSA %s
|
||||
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -filetype=obj < %s | llvm-readobj -symbols -s -sd | FileCheck -check-prefix=ELF %s
|
||||
|
||||
; ELF: Section {
|
||||
; ELF: Name: .text
|
||||
; ELF: SHF_ALLOC (0x2)
|
||||
; ELF: SHF_EXECINSTR (0x4)
|
||||
; ELF: AddressAlignment: 32
|
||||
; ELF: }
|
||||
|
||||
; HSA: .globl simple_align16
|
||||
; HSA: .p2align 5
|
||||
define void @simple_align16(i32 addrspace(1)* addrspace(2)* %ptr.out) align 32 {
|
||||
entry:
|
||||
%out = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(2)* %ptr.out
|
||||
store i32 0, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
|
@ -14,6 +14,7 @@
|
|||
; ELF: Flags [ (0x6)
|
||||
; ELF: SHF_ALLOC (0x2)
|
||||
; ELF: SHF_EXECINSTR (0x4)
|
||||
; ELF: AddressAlignment: 4
|
||||
; ELF: }
|
||||
|
||||
; ELF: SHT_NOTE
|
||||
|
@ -36,6 +37,8 @@
|
|||
; HSA-VI: .hsa_code_object_isa 8,0,1,"AMD","AMDGPU"
|
||||
|
||||
; HSA-NOT: .amdgpu_hsa_kernel simple
|
||||
; HSA: .globl simple
|
||||
; HSA: .p2align 2
|
||||
; HSA: {{^}}simple:
|
||||
; HSA: .amd_kernel_code_t
|
||||
; HSA: enable_sgpr_private_segment_buffer = 1
|
||||
|
@ -58,3 +61,13 @@ entry:
|
|||
store i32 0, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; Ignore explicit alignment that is too low.
|
||||
; HSA: .globl simple_align2
|
||||
; HSA: .p2align 2
|
||||
define void @simple_align2(i32 addrspace(1)* addrspace(2)* %ptr.out) align 2 {
|
||||
entry:
|
||||
%out = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(2)* %ptr.out
|
||||
store i32 0, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue