[AMDGPU] Re-enabled 128bit wide-vector generation for local addr space by default.
Summary: Bug reported here https://bugs.freedesktop.org/show_bug.cgi?id=105464 found to be resolved by some other fixes. Author: FarhanaAleen llvm-svn: 333380
This commit is contained in:
parent
07ac63f89e
commit
eacb1020aa
|
@ -267,12 +267,10 @@ unsigned AMDGPUTTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const {
|
||||||
return 512;
|
return 512;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (AddrSpace == AS.FLAT_ADDRESS)
|
if (AddrSpace == AS.FLAT_ADDRESS ||
|
||||||
return 128;
|
AddrSpace == AS.LOCAL_ADDRESS ||
|
||||||
|
|
||||||
if (AddrSpace == AS.LOCAL_ADDRESS ||
|
|
||||||
AddrSpace == AS.REGION_ADDRESS)
|
AddrSpace == AS.REGION_ADDRESS)
|
||||||
return ST->useDS128() ? 128 : 64;
|
return 128;
|
||||||
|
|
||||||
if (AddrSpace == AS.PRIVATE_ADDRESS)
|
if (AddrSpace == AS.PRIVATE_ADDRESS)
|
||||||
return 8 * ST->getMaxPrivateElementSize();
|
return 8 * ST->getMaxPrivateElementSize();
|
||||||
|
|
|
@ -504,8 +504,7 @@ define amdgpu_kernel void @merge_local_store_2_constants_i32_align_2(i32 addrspa
|
||||||
}
|
}
|
||||||
|
|
||||||
; CHECK-LABEL: @merge_local_store_4_constants_i32
|
; CHECK-LABEL: @merge_local_store_4_constants_i32
|
||||||
; CHECK: store <2 x i32> <i32 456, i32 333>, <2 x i32> addrspace(3)*
|
; CHECK: store <4 x i32> <i32 1234, i32 123, i32 456, i32 333>, <4 x i32> addrspace(3)*
|
||||||
; CHECK: store <2 x i32> <i32 1234, i32 123>, <2 x i32> addrspace(3)*
|
|
||||||
define amdgpu_kernel void @merge_local_store_4_constants_i32(i32 addrspace(3)* %out) #0 {
|
define amdgpu_kernel void @merge_local_store_4_constants_i32(i32 addrspace(3)* %out) #0 {
|
||||||
%out.gep.1 = getelementptr i32, i32 addrspace(3)* %out, i32 1
|
%out.gep.1 = getelementptr i32, i32 addrspace(3)* %out, i32 1
|
||||||
%out.gep.2 = getelementptr i32, i32 addrspace(3)* %out, i32 2
|
%out.gep.2 = getelementptr i32, i32 addrspace(3)* %out, i32 2
|
||||||
|
|
|
@ -29,11 +29,10 @@ define amdgpu_kernel void @no_crash(i32 %arg) {
|
||||||
; longest chain vectorized
|
; longest chain vectorized
|
||||||
|
|
||||||
; CHECK-LABEL: @interleave_get_longest
|
; CHECK-LABEL: @interleave_get_longest
|
||||||
; CHECK: load <2 x i32>
|
; CHECK: load <4 x i32>
|
||||||
; CHECK: load i32
|
; CHECK: load i32
|
||||||
; CHECK: store <2 x i32> zeroinitializer
|
; CHECK: store <2 x i32> zeroinitializer
|
||||||
; CHECK: load i32
|
; CHECK: load i32
|
||||||
; CHECK: load <2 x i32>
|
|
||||||
; CHECK: load i32
|
; CHECK: load i32
|
||||||
; CHECK: load i32
|
; CHECK: load i32
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue