AMDGPU: Actually write nops for writeNopData
Before this was just writing 0s, which ends up looking like a v_cndmask_b32 v0, s0, v0, vcc. Write out an encoded s_nop instead. llvm-svn: 299816
This commit is contained in:
parent
bedaae0d06
commit
dd8fd9dcfd
|
@ -164,7 +164,20 @@ const MCFixupKindInfo &AMDGPUAsmBackend::getFixupKindInfo(
|
|||
}
|
||||
|
||||
bool AMDGPUAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
|
||||
OW->WriteZeros(Count);
|
||||
// If the count is not 4-byte aligned, we must be writing data into the text
|
||||
// section (otherwise we have unaligned instructions, and thus have far
|
||||
// bigger problems), so just write zeros instead.
|
||||
OW->WriteZeros(Count % 4);
|
||||
|
||||
// We are properly aligned, so write NOPs as requested.
|
||||
Count /= 4;
|
||||
|
||||
// FIXME: R600 support.
|
||||
// s_nop 0
|
||||
const uint32_t Encoded_S_NOP_0 = 0xbf800000;
|
||||
|
||||
for (uint64_t I = 0; I != Count; ++I)
|
||||
OW->write32(Encoded_S_NOP_0);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -0,0 +1,87 @@
|
|||
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -filetype=obj < %s | llvm-objdump -d - -mcpu=fiji | FileCheck %s
|
||||
|
||||
; CHECK: kernel0:
|
||||
; CHECK-NEXT: s_endpgm
|
||||
define amdgpu_kernel void @kernel0() align 256 {
|
||||
entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-NEXT: s_nop 0
|
||||
; CHECK-NEXT: s_nop 0
|
||||
; CHECK-NEXT: s_nop 0
|
||||
; CHECK-NEXT: s_nop 0
|
||||
; CHECK-NEXT: s_nop 0
|
||||
; CHECK-NEXT: s_nop 0
|
||||
; CHECK-NEXT: s_nop 0
|
||||
; CHECK-NEXT: s_nop 0
|
||||
|
||||
; CHECK-NEXT: s_nop 0
|
||||
; CHECK-NEXT: s_nop 0
|
||||
; CHECK-NEXT: s_nop 0
|
||||
; CHECK-NEXT: s_nop 0
|
||||
; CHECK-NEXT: s_nop 0
|
||||
; CHECK-NEXT: s_nop 0
|
||||
; CHECK-NEXT: s_nop 0
|
||||
; CHECK-NEXT: s_nop 0
|
||||
|
||||
; CHECK-NEXT: s_nop 0
|
||||
; CHECK-NEXT: s_nop 0
|
||||
; CHECK-NEXT: s_nop 0
|
||||
; CHECK-NEXT: s_nop 0
|
||||
; CHECK-NEXT: s_nop 0
|
||||
; CHECK-NEXT: s_nop 0
|
||||
; CHECK-NEXT: s_nop 0
|
||||
; CHECK-NEXT: s_nop 0
|
||||
|
||||
; CHECK-NEXT: s_nop 0
|
||||
; CHECK-NEXT: s_nop 0
|
||||
; CHECK-NEXT: s_nop 0
|
||||
; CHECK-NEXT: s_nop 0
|
||||
; CHECK-NEXT: s_nop 0
|
||||
; CHECK-NEXT: s_nop 0
|
||||
; CHECK-NEXT: s_nop 0
|
||||
; CHECK-NEXT: s_nop 0
|
||||
|
||||
; CHECK-NEXT: s_nop 0
|
||||
; CHECK-NEXT: s_nop 0
|
||||
; CHECK-NEXT: s_nop 0
|
||||
; CHECK-NEXT: s_nop 0
|
||||
; CHECK-NEXT: s_nop 0
|
||||
; CHECK-NEXT: s_nop 0
|
||||
; CHECK-NEXT: s_nop 0
|
||||
; CHECK-NEXT: s_nop 0
|
||||
|
||||
; CHECK-NEXT: s_nop 0
|
||||
; CHECK-NEXT: s_nop 0
|
||||
; CHECK-NEXT: s_nop 0
|
||||
; CHECK-NEXT: s_nop 0
|
||||
; CHECK-NEXT: s_nop 0
|
||||
; CHECK-NEXT: s_nop 0
|
||||
; CHECK-NEXT: s_nop 0
|
||||
; CHECK-NEXT: s_nop 0
|
||||
|
||||
; CHECK-NEXT: s_nop 0
|
||||
; CHECK-NEXT: s_nop 0
|
||||
; CHECK-NEXT: s_nop 0
|
||||
; CHECK-NEXT: s_nop 0
|
||||
; CHECK-NEXT: s_nop 0
|
||||
; CHECK-NEXT: s_nop 0
|
||||
; CHECK-NEXT: s_nop 0
|
||||
; CHECK-NEXT: s_nop 0
|
||||
|
||||
; CHECK-NEXT: s_nop 0
|
||||
; CHECK-NEXT: s_nop 0
|
||||
; CHECK-NEXT: s_nop 0
|
||||
; CHECK-NEXT: s_nop 0
|
||||
; CHECK-NEXT: s_nop 0
|
||||
; CHECK-NEXT: s_nop 0
|
||||
; CHECK-NEXT: s_nop 0 // 0000000001FC: BF800000
|
||||
|
||||
; CHECK-NEXT: {{^$}}
|
||||
; CHECK-NEXT: kernel1:
|
||||
; CHECK-NEXT: s_endpgm
|
||||
define amdgpu_kernel void @kernel1(i32 addrspace(1)* addrspace(2)* %ptr.out) align 256 {
|
||||
entry:
|
||||
ret void
|
||||
}
|
Loading…
Reference in New Issue