[AMDGPU] Split R600/SI getFrameIndexReference and emit stack object offsets for SI
Differential Revision: https://reviews.llvm.org/D29674 llvm-svn: 297499
This commit is contained in:
parent
874d26a89d
commit
ffdb00eda9
|
@ -12,11 +12,6 @@
|
|||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "AMDGPUFrameLowering.h"
|
||||
#include "AMDGPURegisterInfo.h"
|
||||
#include "AMDGPUSubtarget.h"
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include "llvm/CodeGen/MachineFrameInfo.h"
|
||||
#include "llvm/Support/MathExtras.h"
|
||||
|
||||
using namespace llvm;
|
||||
AMDGPUFrameLowering::AMDGPUFrameLowering(StackDirection D, unsigned StackAl,
|
||||
|
@ -69,34 +64,3 @@ unsigned AMDGPUFrameLowering::getStackWidth(const MachineFunction &MF) const {
|
|||
// T1.W = stack[1].w
|
||||
return 1;
|
||||
}
|
||||
|
||||
/// \returns The number of registers allocated for \p FI.
|
||||
int AMDGPUFrameLowering::getFrameIndexReference(const MachineFunction &MF,
|
||||
int FI,
|
||||
unsigned &FrameReg) const {
|
||||
const MachineFrameInfo &MFI = MF.getFrameInfo();
|
||||
const AMDGPURegisterInfo *RI
|
||||
= MF.getSubtarget<AMDGPUSubtarget>().getRegisterInfo();
|
||||
|
||||
// Fill in FrameReg output argument.
|
||||
FrameReg = RI->getFrameRegister(MF);
|
||||
|
||||
// Start the offset at 2 so we don't overwrite work group information.
|
||||
// XXX: We should only do this when the shader actually uses this
|
||||
// information.
|
||||
unsigned OffsetBytes = 2 * (getStackWidth(MF) * 4);
|
||||
int UpperBound = FI == -1 ? MFI.getNumObjects() : FI;
|
||||
|
||||
for (int i = MFI.getObjectIndexBegin(); i < UpperBound; ++i) {
|
||||
OffsetBytes = alignTo(OffsetBytes, MFI.getObjectAlignment(i));
|
||||
OffsetBytes += MFI.getObjectSize(i);
|
||||
// Each register holds 4 bytes, so we must always align the offset to at
|
||||
// least 4 bytes, so that 2 frame objects won't share the same register.
|
||||
OffsetBytes = alignTo(OffsetBytes, 4);
|
||||
}
|
||||
|
||||
if (FI != -1)
|
||||
OffsetBytes = alignTo(OffsetBytes, MFI.getObjectAlignment(FI));
|
||||
|
||||
return OffsetBytes / (getStackWidth(MF) * 4);
|
||||
}
|
||||
|
|
|
@ -34,9 +34,6 @@ public:
|
|||
/// values to the stack.
|
||||
unsigned getStackWidth(const MachineFunction &MF) const;
|
||||
|
||||
int getFrameIndexReference(const MachineFunction &MF, int FI,
|
||||
unsigned &FrameReg) const override;
|
||||
|
||||
bool hasFP(const MachineFunction &MF) const override {
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -8,7 +8,43 @@
|
|||
//==-----------------------------------------------------------------------===//
|
||||
|
||||
#include "R600FrameLowering.h"
|
||||
#include "AMDGPUSubtarget.h"
|
||||
#include "R600RegisterInfo.h"
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include "llvm/CodeGen/MachineFrameInfo.h"
|
||||
#include "llvm/Support/MathExtras.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
R600FrameLowering::~R600FrameLowering() = default;
|
||||
|
||||
/// \returns The number of registers allocated for \p FI.
|
||||
int R600FrameLowering::getFrameIndexReference(const MachineFunction &MF,
|
||||
int FI,
|
||||
unsigned &FrameReg) const {
|
||||
const MachineFrameInfo &MFI = MF.getFrameInfo();
|
||||
const R600RegisterInfo *RI
|
||||
= MF.getSubtarget<R600Subtarget>().getRegisterInfo();
|
||||
|
||||
// Fill in FrameReg output argument.
|
||||
FrameReg = RI->getFrameRegister(MF);
|
||||
|
||||
// Start the offset at 2 so we don't overwrite work group information.
|
||||
// FIXME: We should only do this when the shader actually uses this
|
||||
// information.
|
||||
unsigned OffsetBytes = 2 * (getStackWidth(MF) * 4);
|
||||
int UpperBound = FI == -1 ? MFI.getNumObjects() : FI;
|
||||
|
||||
for (int i = MFI.getObjectIndexBegin(); i < UpperBound; ++i) {
|
||||
OffsetBytes = alignTo(OffsetBytes, MFI.getObjectAlignment(i));
|
||||
OffsetBytes += MFI.getObjectSize(i);
|
||||
// Each register holds 4 bytes, so we must always align the offset to at
|
||||
// least 4 bytes, so that 2 frame objects won't share the same register.
|
||||
OffsetBytes = alignTo(OffsetBytes, 4);
|
||||
}
|
||||
|
||||
if (FI != -1)
|
||||
OffsetBytes = alignTo(OffsetBytes, MFI.getObjectAlignment(FI));
|
||||
|
||||
return OffsetBytes / (getStackWidth(MF) * 4);
|
||||
}
|
||||
|
|
|
@ -25,6 +25,8 @@ public:
|
|||
MachineBasicBlock &MBB) const override {}
|
||||
void emitEpilogue(MachineFunction &MF,
|
||||
MachineBasicBlock &MBB) const override {}
|
||||
int getFrameIndexReference(const MachineFunction &MF, int FI,
|
||||
unsigned &FrameReg) const override;
|
||||
};
|
||||
|
||||
} // end namespace llvm
|
||||
|
|
|
@ -393,6 +393,14 @@ static bool allStackObjectsAreDead(const MachineFrameInfo &MFI) {
|
|||
return true;
|
||||
}
|
||||
|
||||
int SIFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
|
||||
unsigned &FrameReg) const {
|
||||
const SIRegisterInfo *RI = MF.getSubtarget<SISubtarget>().getRegisterInfo();
|
||||
|
||||
FrameReg = RI->getFrameRegister(MF);
|
||||
return MF.getFrameInfo().getObjectOffset(FI);
|
||||
}
|
||||
|
||||
void SIFrameLowering::processFunctionBeforeFrameFinalized(
|
||||
MachineFunction &MF,
|
||||
RegScavenger *RS) const {
|
||||
|
|
|
@ -30,6 +30,8 @@ public:
|
|||
MachineBasicBlock &MBB) const override;
|
||||
void emitEpilogue(MachineFunction &MF,
|
||||
MachineBasicBlock &MBB) const override;
|
||||
int getFrameIndexReference(const MachineFunction &MF, int FI,
|
||||
unsigned &FrameReg) const override;
|
||||
|
||||
void processFunctionBeforeFrameFinalized(
|
||||
MachineFunction &MF,
|
||||
|
|
|
@ -0,0 +1,92 @@
|
|||
; RUN: llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -verify-machineinstrs -filetype=obj < %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s
|
||||
|
||||
; LLVM IR generated with the following command and OpenCL source:
|
||||
;
|
||||
; $clang -cl-std=CL2.0 -g -O0 -target amdgcn-amd-amdhsa -S -emit-llvm <path-to-file>
|
||||
;
|
||||
; global int GlobA;
|
||||
; global int GlobB;
|
||||
;
|
||||
; kernel void kernel1(unsigned int ArgN, global int *ArgA, global int *ArgB) {
|
||||
; ArgA[ArgN] += ArgB[ArgN];
|
||||
; }
|
||||
|
||||
declare void @llvm.dbg.declare(metadata, metadata, metadata)
|
||||
|
||||
; CHECK-NOT: DW_AT_location [DW_FORM_block1] (<0x05> 03 00 00 00 00 )
|
||||
@GlobA = common addrspace(1) global i32 0, align 4, !dbg !0
|
||||
; CHECK-NOT: DW_AT_location [DW_FORM_block1] (<0x05> 03 00 00 00 00 )
|
||||
@GlobB = common addrspace(1) global i32 0, align 4, !dbg !6
|
||||
|
||||
define amdgpu_kernel void @kernel1(
|
||||
; CHECK-NOT: DW_AT_location [DW_FORM_block1] (<0x06> 91 04 10 01 16 18 )
|
||||
i32 %ArgN,
|
||||
; CHECK-NOT: DW_AT_location [DW_FORM_block1] (<0x06> 91 08 10 01 16 18 )
|
||||
i32 addrspace(1)* %ArgA,
|
||||
; CHECK-NOT: DW_AT_location [DW_FORM_block1] (<0x06> 91 10 10 01 16 18 )
|
||||
i32 addrspace(1)* %ArgB) !dbg !13 {
|
||||
entry:
|
||||
%ArgN.addr = alloca i32, align 4
|
||||
%ArgA.addr = alloca i32 addrspace(1)*, align 4
|
||||
%ArgB.addr = alloca i32 addrspace(1)*, align 4
|
||||
store i32 %ArgN, i32* %ArgN.addr, align 4
|
||||
call void @llvm.dbg.declare(metadata i32* %ArgN.addr, metadata !22, metadata !23), !dbg !24
|
||||
store i32 addrspace(1)* %ArgA, i32 addrspace(1)** %ArgA.addr, align 4
|
||||
call void @llvm.dbg.declare(metadata i32 addrspace(1)** %ArgA.addr, metadata !25, metadata !23), !dbg !26
|
||||
store i32 addrspace(1)* %ArgB, i32 addrspace(1)** %ArgB.addr, align 4
|
||||
call void @llvm.dbg.declare(metadata i32 addrspace(1)** %ArgB.addr, metadata !27, metadata !23), !dbg !28
|
||||
%0 = load i32 addrspace(1)*, i32 addrspace(1)** %ArgB.addr, align 4, !dbg !29
|
||||
%1 = load i32, i32* %ArgN.addr, align 4, !dbg !30
|
||||
%idxprom = zext i32 %1 to i64, !dbg !29
|
||||
%arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 %idxprom, !dbg !29
|
||||
%2 = load i32, i32 addrspace(1)* %arrayidx, align 4, !dbg !29
|
||||
%3 = load i32 addrspace(1)*, i32 addrspace(1)** %ArgA.addr, align 4, !dbg !31
|
||||
%4 = load i32, i32* %ArgN.addr, align 4, !dbg !32
|
||||
%idxprom1 = zext i32 %4 to i64, !dbg !31
|
||||
%arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %3, i64 %idxprom1, !dbg !31
|
||||
%5 = load i32, i32 addrspace(1)* %arrayidx2, align 4, !dbg !33
|
||||
%add = add nsw i32 %5, %2, !dbg !33
|
||||
store i32 %add, i32 addrspace(1)* %arrayidx2, align 4, !dbg !33
|
||||
ret void, !dbg !34
|
||||
}
|
||||
|
||||
!llvm.dbg.cu = !{!2}
|
||||
!opencl.ocl.version = !{!9}
|
||||
!llvm.module.flags = !{!10, !11}
|
||||
!llvm.ident = !{!12}
|
||||
|
||||
!0 = !DIGlobalVariableExpression(var: !1)
|
||||
!1 = distinct !DIGlobalVariable(name: "GlobA", scope: !2, file: !3, line: 1, type: !8, isLocal: false, isDefinition: true)
|
||||
!2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !3, producer: "clang version 5.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, globals: !5)
|
||||
!3 = !DIFile(filename: "variable-locations-dwarf-v1.cl", directory: "/some/random/directory")
|
||||
!4 = !{}
|
||||
!5 = !{!0, !6}
|
||||
!6 = !DIGlobalVariableExpression(var: !7)
|
||||
!7 = distinct !DIGlobalVariable(name: "GlobB", scope: !2, file: !3, line: 2, type: !8, isLocal: false, isDefinition: true)
|
||||
!8 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
|
||||
!9 = !{i32 2, i32 0}
|
||||
!10 = !{i32 2, !"Dwarf Version", i32 1}
|
||||
!11 = !{i32 2, !"Debug Info Version", i32 3}
|
||||
!12 = !{!"clang version 5.0.0"}
|
||||
!13 = distinct !DISubprogram(name: "kernel1", scope: !3, file: !3, line: 4, type: !14, isLocal: false, isDefinition: true, scopeLine: 4, flags: DIFlagPrototyped, isOptimized: false, unit: !2, variables: !4)
|
||||
!14 = !DISubroutineType(types: !15)
|
||||
!15 = !{null, !16, !17, !17}
|
||||
!16 = !DIBasicType(name: "unsigned int", size: 32, encoding: DW_ATE_unsigned)
|
||||
!17 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !8, size: 64)
|
||||
!18 = !{i32 0, i32 1, i32 1}
|
||||
!19 = !{!"none", !"none", !"none"}
|
||||
!20 = !{!"uint", !"int*", !"int*"}
|
||||
!21 = !{!"", !"", !""}
|
||||
!22 = !DILocalVariable(name: "ArgN", arg: 1, scope: !13, file: !3, line: 4, type: !16)
|
||||
!23 = !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)
|
||||
!24 = !DILocation(line: 4, column: 34, scope: !13)
|
||||
!25 = !DILocalVariable(name: "ArgA", arg: 2, scope: !13, file: !3, line: 4, type: !17)
|
||||
!26 = !DILocation(line: 4, column: 52, scope: !13)
|
||||
!27 = !DILocalVariable(name: "ArgB", arg: 3, scope: !13, file: !3, line: 4, type: !17)
|
||||
!28 = !DILocation(line: 4, column: 70, scope: !13)
|
||||
!29 = !DILocation(line: 5, column: 17, scope: !13)
|
||||
!30 = !DILocation(line: 5, column: 22, scope: !13)
|
||||
!31 = !DILocation(line: 5, column: 3, scope: !13)
|
||||
!32 = !DILocation(line: 5, column: 8, scope: !13)
|
||||
!33 = !DILocation(line: 5, column: 14, scope: !13)
|
||||
!34 = !DILocation(line: 6, column: 1, scope: !13)
|
|
@ -0,0 +1,111 @@
|
|||
; RUN: llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -verify-machineinstrs -filetype=obj < %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s
|
||||
|
||||
; LLVM IR generated with the following command and OpenCL source:
|
||||
;
|
||||
; $clang -cl-std=CL2.0 -g -O0 -target amdgcn-amd-amdhsa -S -emit-llvm <path-to-file>
|
||||
;
|
||||
; global int GlobA;
|
||||
; global int GlobB;
|
||||
;
|
||||
; kernel void kernel1(unsigned int ArgN, global int *ArgA, global int *ArgB) {
|
||||
; ArgA[ArgN] += ArgB[ArgN];
|
||||
; }
|
||||
|
||||
declare void @llvm.dbg.declare(metadata, metadata, metadata)
|
||||
|
||||
; CHECK: {{.*}}DW_TAG_variable
|
||||
; CHECK-NEXT: DW_AT_name {{.*}}"GlobA"
|
||||
; CHECK-NEXT: DW_AT_type
|
||||
; CHECK-NEXT: DW_AT_external
|
||||
; CHECK-NEXT: DW_AT_decl_file
|
||||
; CHECK-NEXT: DW_AT_decl_line
|
||||
; CHECK-NEXT: DW_AT_location [DW_FORM_block1] (<0x05> 03 00 00 00 00 )
|
||||
@GlobA = common addrspace(1) global i32 0, align 4, !dbg !0
|
||||
|
||||
; CHECK: {{.*}}DW_TAG_variable
|
||||
; CHECK-NEXT: DW_AT_name {{.*}}"GlobB"
|
||||
; CHECK-NEXT: DW_AT_type
|
||||
; CHECK-NEXT: DW_AT_external
|
||||
; CHECK-NEXT: DW_AT_decl_file
|
||||
; CHECK-NEXT: DW_AT_decl_line
|
||||
; CHECK-NEXT: DW_AT_location [DW_FORM_block1] (<0x05> 03 00 00 00 00 )
|
||||
@GlobB = common addrspace(1) global i32 0, align 4, !dbg !6
|
||||
|
||||
define amdgpu_kernel void @kernel1(
|
||||
; CHECK: {{.*}}DW_TAG_formal_parameter
|
||||
; CHECK-NEXT: DW_AT_location [DW_FORM_block1] (<0x06> 91 04 10 01 16 18 )
|
||||
; CHECK-NEXT: DW_AT_name {{.*}}"ArgN"
|
||||
i32 %ArgN,
|
||||
; CHECK: {{.*}}DW_TAG_formal_parameter
|
||||
; CHECK-NEXT: DW_AT_location [DW_FORM_block1] (<0x06> 91 08 10 01 16 18 )
|
||||
; CHECK-NEXT: DW_AT_name {{.*}}"ArgA"
|
||||
i32 addrspace(1)* %ArgA,
|
||||
; CHECK: {{.*}}DW_TAG_formal_parameter
|
||||
; CHECK-NEXT: DW_AT_location [DW_FORM_block1] (<0x06> 91 10 10 01 16 18 )
|
||||
; CHECK-NEXT: DW_AT_name {{.*}}"ArgB"
|
||||
i32 addrspace(1)* %ArgB) !dbg !13 {
|
||||
entry:
|
||||
%ArgN.addr = alloca i32, align 4
|
||||
%ArgA.addr = alloca i32 addrspace(1)*, align 4
|
||||
%ArgB.addr = alloca i32 addrspace(1)*, align 4
|
||||
store i32 %ArgN, i32* %ArgN.addr, align 4
|
||||
call void @llvm.dbg.declare(metadata i32* %ArgN.addr, metadata !22, metadata !23), !dbg !24
|
||||
store i32 addrspace(1)* %ArgA, i32 addrspace(1)** %ArgA.addr, align 4
|
||||
call void @llvm.dbg.declare(metadata i32 addrspace(1)** %ArgA.addr, metadata !25, metadata !23), !dbg !26
|
||||
store i32 addrspace(1)* %ArgB, i32 addrspace(1)** %ArgB.addr, align 4
|
||||
call void @llvm.dbg.declare(metadata i32 addrspace(1)** %ArgB.addr, metadata !27, metadata !23), !dbg !28
|
||||
%0 = load i32 addrspace(1)*, i32 addrspace(1)** %ArgB.addr, align 4, !dbg !29
|
||||
%1 = load i32, i32* %ArgN.addr, align 4, !dbg !30
|
||||
%idxprom = zext i32 %1 to i64, !dbg !29
|
||||
%arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 %idxprom, !dbg !29
|
||||
%2 = load i32, i32 addrspace(1)* %arrayidx, align 4, !dbg !29
|
||||
%3 = load i32 addrspace(1)*, i32 addrspace(1)** %ArgA.addr, align 4, !dbg !31
|
||||
%4 = load i32, i32* %ArgN.addr, align 4, !dbg !32
|
||||
%idxprom1 = zext i32 %4 to i64, !dbg !31
|
||||
%arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %3, i64 %idxprom1, !dbg !31
|
||||
%5 = load i32, i32 addrspace(1)* %arrayidx2, align 4, !dbg !33
|
||||
%add = add nsw i32 %5, %2, !dbg !33
|
||||
store i32 %add, i32 addrspace(1)* %arrayidx2, align 4, !dbg !33
|
||||
ret void, !dbg !34
|
||||
}
|
||||
|
||||
!llvm.dbg.cu = !{!2}
|
||||
!opencl.ocl.version = !{!9}
|
||||
!llvm.module.flags = !{!10, !11}
|
||||
!llvm.ident = !{!12}
|
||||
|
||||
!0 = !DIGlobalVariableExpression(var: !1)
|
||||
!1 = distinct !DIGlobalVariable(name: "GlobA", scope: !2, file: !3, line: 1, type: !8, isLocal: false, isDefinition: true)
|
||||
!2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !3, producer: "clang version 5.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, globals: !5)
|
||||
!3 = !DIFile(filename: "variable-locations.cl", directory: "/some/random/directory")
|
||||
!4 = !{}
|
||||
!5 = !{!0, !6}
|
||||
!6 = !DIGlobalVariableExpression(var: !7)
|
||||
!7 = distinct !DIGlobalVariable(name: "GlobB", scope: !2, file: !3, line: 2, type: !8, isLocal: false, isDefinition: true)
|
||||
!8 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
|
||||
!9 = !{i32 2, i32 0}
|
||||
!10 = !{i32 2, !"Dwarf Version", i32 2}
|
||||
!11 = !{i32 2, !"Debug Info Version", i32 3}
|
||||
!12 = !{!"clang version 5.0.0"}
|
||||
!13 = distinct !DISubprogram(name: "kernel1", scope: !3, file: !3, line: 4, type: !14, isLocal: false, isDefinition: true, scopeLine: 4, flags: DIFlagPrototyped, isOptimized: false, unit: !2, variables: !4)
|
||||
!14 = !DISubroutineType(types: !15)
|
||||
!15 = !{null, !16, !17, !17}
|
||||
!16 = !DIBasicType(name: "unsigned int", size: 32, encoding: DW_ATE_unsigned)
|
||||
!17 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !8, size: 64)
|
||||
!18 = !{i32 0, i32 1, i32 1}
|
||||
!19 = !{!"none", !"none", !"none"}
|
||||
!20 = !{!"uint", !"int*", !"int*"}
|
||||
!21 = !{!"", !"", !""}
|
||||
!22 = !DILocalVariable(name: "ArgN", arg: 1, scope: !13, file: !3, line: 4, type: !16)
|
||||
!23 = !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)
|
||||
!24 = !DILocation(line: 4, column: 34, scope: !13)
|
||||
!25 = !DILocalVariable(name: "ArgA", arg: 2, scope: !13, file: !3, line: 4, type: !17)
|
||||
!26 = !DILocation(line: 4, column: 52, scope: !13)
|
||||
!27 = !DILocalVariable(name: "ArgB", arg: 3, scope: !13, file: !3, line: 4, type: !17)
|
||||
!28 = !DILocation(line: 4, column: 70, scope: !13)
|
||||
!29 = !DILocation(line: 5, column: 17, scope: !13)
|
||||
!30 = !DILocation(line: 5, column: 22, scope: !13)
|
||||
!31 = !DILocation(line: 5, column: 3, scope: !13)
|
||||
!32 = !DILocation(line: 5, column: 8, scope: !13)
|
||||
!33 = !DILocation(line: 5, column: 14, scope: !13)
|
||||
!34 = !DILocation(line: 6, column: 1, scope: !13)
|
Loading…
Reference in New Issue