[AMDGPU] Fix pointer info for pseudo source for r600

The pointer info for pseudo source for r600 is not correct when
alloca addr space is not 0, which causes invalid SDNode for r600---amdgiz.

This patch fixes that.

Differential Revision: https://reviews.llvm.org/D39670

llvm-svn: 317861
This commit is contained in:
Yaxun Liu 2017-11-10 01:53:24 +00:00
parent 9278019eb3
commit 920cc2f813
6 changed files with 109 additions and 87 deletions

View File

@ -1493,3 +1493,21 @@ void R600InstrInfo::clearFlag(MachineInstr &MI, unsigned Operand,
FlagOp.setImm(InstFlags);
}
}
unsigned R600InstrInfo::getAddressSpaceForPseudoSourceKind(
PseudoSourceValue::PSVKind Kind) const {
switch (Kind) {
case PseudoSourceValue::Stack:
case PseudoSourceValue::FixedStack:
return AMDGPUASI.PRIVATE_ADDRESS;
case PseudoSourceValue::ConstantPool:
case PseudoSourceValue::GOT:
case PseudoSourceValue::JumpTable:
case PseudoSourceValue::GlobalValueCallEntry:
case PseudoSourceValue::ExternalSymbolCallEntry:
case PseudoSourceValue::TargetCustom:
return AMDGPUASI.CONSTANT_ADDRESS;
}
llvm_unreachable("Invalid pseudo source kind");
return AMDGPUASI.PRIVATE_ADDRESS;
}

View File

@ -318,6 +318,9 @@ public:
bool isRegisterLoad(const MachineInstr &MI) const {
return get(MI.getOpcode()).TSFlags & R600InstrFlags::REGISTER_LOAD;
}
unsigned getAddressSpaceForPseudoSourceKind(
PseudoSourceValue::PSVKind Kind) const override;
};
namespace AMDGPU {

View File

@ -1,6 +1,6 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}constant_load_i1:
; GCN: buffer_load_ubyte

View File

@ -1,6 +1,6 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}global_load_i1:
; GCN: buffer_load_ubyte

View File

@ -1,6 +1,6 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}local_load_i1:
; GCN: ds_read_u8

View File

@ -1,9 +1,10 @@
; RUN: llc -march=amdgcn -mcpu=verde -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=verde -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck --check-prefix=EG -check-prefix=FUNC %s
; RUN: opt -S -mtriple=amdgcn-- -amdgpu-promote-alloca -sroa -instcombine < %s | FileCheck -check-prefix=OPT %s
; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s | FileCheck --check-prefix=EG -check-prefix=FUNC %s
; RUN: opt -S -mtriple=amdgcn---amdgiz -amdgpu-promote-alloca -sroa -instcombine < %s | FileCheck -check-prefix=OPT %s
target datalayout = "A5"
; OPT-LABEL: @vector_read(
; OPT: %0 = extractelement <4 x i32> <i32 0, i32 1, i32 2, i32 3>, i32 %index
@ -17,17 +18,17 @@
; EG: MOVA_INT
define amdgpu_kernel void @vector_read(i32 addrspace(1)* %out, i32 %index) {
entry:
%tmp = alloca [4 x i32]
%x = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 0
%y = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 1
%z = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 2
%w = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 3
store i32 0, i32* %x
store i32 1, i32* %y
store i32 2, i32* %z
store i32 3, i32* %w
%tmp1 = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 %index
%tmp2 = load i32, i32* %tmp1
%tmp = alloca [4 x i32], addrspace(5)
%x = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 0
%y = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 1
%z = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 2
%w = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 3
store i32 0, i32 addrspace(5)* %x
store i32 1, i32 addrspace(5)* %y
store i32 2, i32 addrspace(5)* %z
store i32 3, i32 addrspace(5)* %w
%tmp1 = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 %index
%tmp2 = load i32, i32 addrspace(5)* %tmp1
store i32 %tmp2, i32 addrspace(1)* %out
ret void
}
@ -46,19 +47,19 @@ entry:
; EG: MOVA_INT
define amdgpu_kernel void @vector_write(i32 addrspace(1)* %out, i32 %w_index, i32 %r_index) {
entry:
%tmp = alloca [4 x i32]
%x = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 0
%y = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 1
%z = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 2
%w = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 3
store i32 0, i32* %x
store i32 0, i32* %y
store i32 0, i32* %z
store i32 0, i32* %w
%tmp1 = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 %w_index
store i32 1, i32* %tmp1
%tmp2 = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 %r_index
%tmp3 = load i32, i32* %tmp2
%tmp = alloca [4 x i32], addrspace(5)
%x = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 0
%y = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 1
%z = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 2
%w = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 3
store i32 0, i32 addrspace(5)* %x
store i32 0, i32 addrspace(5)* %y
store i32 0, i32 addrspace(5)* %z
store i32 0, i32 addrspace(5)* %w
%tmp1 = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 %w_index
store i32 1, i32 addrspace(5)* %tmp1
%tmp2 = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 %r_index
%tmp3 = load i32, i32 addrspace(5)* %tmp2
store i32 %tmp3, i32 addrspace(1)* %out
ret void
}
@ -73,19 +74,19 @@ entry:
; EG: STORE_RAW
define amdgpu_kernel void @bitcast_gep(i32 addrspace(1)* %out, i32 %w_index, i32 %r_index) {
entry:
%tmp = alloca [4 x i32]
%x = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 0
%y = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 1
%z = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 2
%w = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 3
store i32 0, i32* %x
store i32 0, i32* %y
store i32 0, i32* %z
store i32 0, i32* %w
%tmp1 = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 1
%tmp2 = bitcast i32* %tmp1 to [4 x i32]*
%tmp3 = getelementptr [4 x i32], [4 x i32]* %tmp2, i32 0, i32 0
%tmp4 = load i32, i32* %tmp3
%tmp = alloca [4 x i32], addrspace(5)
%x = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 0
%y = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 1
%z = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 2
%w = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 3
store i32 0, i32 addrspace(5)* %x
store i32 0, i32 addrspace(5)* %y
store i32 0, i32 addrspace(5)* %z
store i32 0, i32 addrspace(5)* %w
%tmp1 = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 1
%tmp2 = bitcast i32 addrspace(5)* %tmp1 to [4 x i32] addrspace(5)*
%tmp3 = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp2, i32 0, i32 0
%tmp4 = load i32, i32 addrspace(5)* %tmp3
store i32 %tmp4, i32 addrspace(1)* %out
ret void
}
@ -95,18 +96,18 @@ entry:
; OPT: store i32 %0, i32 addrspace(1)* %out, align 4
define amdgpu_kernel void @vector_read_bitcast_gep(i32 addrspace(1)* %out, i32 %index) {
entry:
%tmp = alloca [4 x i32]
%x = getelementptr inbounds [4 x i32], [4 x i32]* %tmp, i32 0, i32 0
%y = getelementptr inbounds [4 x i32], [4 x i32]* %tmp, i32 0, i32 1
%z = getelementptr inbounds [4 x i32], [4 x i32]* %tmp, i32 0, i32 2
%w = getelementptr inbounds [4 x i32], [4 x i32]* %tmp, i32 0, i32 3
%bc = bitcast i32* %x to float*
store float 1.0, float* %bc
store i32 1, i32* %y
store i32 2, i32* %z
store i32 3, i32* %w
%tmp1 = getelementptr inbounds [4 x i32], [4 x i32]* %tmp, i32 0, i32 %index
%tmp2 = load i32, i32* %tmp1
%tmp = alloca [4 x i32], addrspace(5)
%x = getelementptr inbounds [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 0
%y = getelementptr inbounds [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 1
%z = getelementptr inbounds [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 2
%w = getelementptr inbounds [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 3
%bc = bitcast i32 addrspace(5)* %x to float addrspace(5)*
store float 1.0, float addrspace(5)* %bc
store i32 1, i32 addrspace(5)* %y
store i32 2, i32 addrspace(5)* %z
store i32 3, i32 addrspace(5)* %w
%tmp1 = getelementptr inbounds [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 %index
%tmp2 = load i32, i32 addrspace(5)* %tmp1
store i32 %tmp2, i32 addrspace(1)* %out
ret void
}
@ -123,18 +124,18 @@ entry:
; OPT: load float
define amdgpu_kernel void @vector_read_bitcast_alloca(float addrspace(1)* %out, i32 %index) {
entry:
%tmp = alloca [4 x i32]
%tmp.bc = bitcast [4 x i32]* %tmp to [4 x float]*
%x = getelementptr inbounds [4 x float], [4 x float]* %tmp.bc, i32 0, i32 0
%y = getelementptr inbounds [4 x float], [4 x float]* %tmp.bc, i32 0, i32 1
%z = getelementptr inbounds [4 x float], [4 x float]* %tmp.bc, i32 0, i32 2
%w = getelementptr inbounds [4 x float], [4 x float]* %tmp.bc, i32 0, i32 3
store float 0.0, float* %x
store float 1.0, float* %y
store float 2.0, float* %z
store float 4.0, float* %w
%tmp1 = getelementptr inbounds [4 x float], [4 x float]* %tmp.bc, i32 0, i32 %index
%tmp2 = load float, float* %tmp1
%tmp = alloca [4 x i32], addrspace(5)
%tmp.bc = bitcast [4 x i32] addrspace(5)* %tmp to [4 x float] addrspace(5)*
%x = getelementptr inbounds [4 x float], [4 x float] addrspace(5)* %tmp.bc, i32 0, i32 0
%y = getelementptr inbounds [4 x float], [4 x float] addrspace(5)* %tmp.bc, i32 0, i32 1
%z = getelementptr inbounds [4 x float], [4 x float] addrspace(5)* %tmp.bc, i32 0, i32 2
%w = getelementptr inbounds [4 x float], [4 x float] addrspace(5)* %tmp.bc, i32 0, i32 3
store float 0.0, float addrspace(5)* %x
store float 1.0, float addrspace(5)* %y
store float 2.0, float addrspace(5)* %z
store float 4.0, float addrspace(5)* %w
%tmp1 = getelementptr inbounds [4 x float], [4 x float] addrspace(5)* %tmp.bc, i32 0, i32 %index
%tmp2 = load float, float addrspace(5)* %tmp1
store float %tmp2, float addrspace(1)* %out
ret void
}
@ -146,17 +147,17 @@ entry:
; OPT: store i32 %0, i32 addrspace(1)* %out, align 4
define amdgpu_kernel void @vector_read_with_local_arg(i32 addrspace(3)* %stopper, i32 addrspace(1)* %out, i32 %index) {
entry:
%tmp = alloca [4 x i32]
%x = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 0
%y = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 1
%z = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 2
%w = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 3
store i32 0, i32* %x
store i32 1, i32* %y
store i32 2, i32* %z
store i32 3, i32* %w
%tmp1 = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 %index
%tmp2 = load i32, i32* %tmp1
%tmp = alloca [4 x i32], addrspace(5)
%x = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 0
%y = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 1
%z = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 2
%w = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 3
store i32 0, i32 addrspace(5)* %x
store i32 1, i32 addrspace(5)* %y
store i32 2, i32 addrspace(5)* %z
store i32 3, i32 addrspace(5)* %w
%tmp1 = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 %index
%tmp2 = load i32, i32 addrspace(5)* %tmp1
store i32 %tmp2, i32 addrspace(1)* %out
ret void
}