[AMDGPU] Allow int types for MUBUF vdata
Summary: Previously the new llvm.amdgcn.raw/struct.buffer.load/store intrinsics only allowed float types for the data to be loaded or stored, which sometimes meant the frontend needed to generate a bitcast. In this, the new intrinsics copied the old buffer intrinsics. This commit extends the new intrinsics to allow int types as well. Subscribers: arsenm, kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, t-tye, llvm-commits Differential Revision: https://reviews.llvm.org/D50315 Change-Id: I8202af2d036455553681dcbb3d7d32ae273f8f85 llvm-svn: 340270
This commit is contained in:
parent
4f703f5e11
commit
bb5ee41ab4
|
@ -823,7 +823,7 @@ def int_amdgcn_buffer_store : AMDGPUBufferStore;
|
||||||
// These new instrinsics also keep the offset and soffset arguments separate as
|
// These new instrinsics also keep the offset and soffset arguments separate as
|
||||||
// they behave differently in bounds checking and swizzling.
|
// they behave differently in bounds checking and swizzling.
|
||||||
class AMDGPURawBufferLoad : Intrinsic <
|
class AMDGPURawBufferLoad : Intrinsic <
|
||||||
[llvm_anyfloat_ty],
|
[llvm_any_ty],
|
||||||
[llvm_v4i32_ty, // rsrc(SGPR)
|
[llvm_v4i32_ty, // rsrc(SGPR)
|
||||||
llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
|
llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
|
||||||
llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
|
llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
|
||||||
|
@ -834,7 +834,7 @@ def int_amdgcn_raw_buffer_load_format : AMDGPURawBufferLoad;
|
||||||
def int_amdgcn_raw_buffer_load : AMDGPURawBufferLoad;
|
def int_amdgcn_raw_buffer_load : AMDGPURawBufferLoad;
|
||||||
|
|
||||||
class AMDGPUStructBufferLoad : Intrinsic <
|
class AMDGPUStructBufferLoad : Intrinsic <
|
||||||
[llvm_anyfloat_ty],
|
[llvm_any_ty],
|
||||||
[llvm_v4i32_ty, // rsrc(SGPR)
|
[llvm_v4i32_ty, // rsrc(SGPR)
|
||||||
llvm_i32_ty, // vindex(VGPR)
|
llvm_i32_ty, // vindex(VGPR)
|
||||||
llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
|
llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
|
||||||
|
@ -847,7 +847,7 @@ def int_amdgcn_struct_buffer_load : AMDGPUStructBufferLoad;
|
||||||
|
|
||||||
class AMDGPURawBufferStore : Intrinsic <
|
class AMDGPURawBufferStore : Intrinsic <
|
||||||
[],
|
[],
|
||||||
[llvm_anyfloat_ty, // vdata(VGPR) -- can currently only select f32, v2f32, v4f32
|
[llvm_any_ty, // vdata(VGPR)
|
||||||
llvm_v4i32_ty, // rsrc(SGPR)
|
llvm_v4i32_ty, // rsrc(SGPR)
|
||||||
llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
|
llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
|
||||||
llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
|
llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
|
||||||
|
@ -859,7 +859,7 @@ def int_amdgcn_raw_buffer_store : AMDGPURawBufferStore;
|
||||||
|
|
||||||
class AMDGPUStructBufferStore : Intrinsic <
|
class AMDGPUStructBufferStore : Intrinsic <
|
||||||
[],
|
[],
|
||||||
[llvm_anyfloat_ty, // vdata(VGPR) -- can currently only select f32, v2f32, v4f32
|
[llvm_any_ty, // vdata(VGPR)
|
||||||
llvm_v4i32_ty, // rsrc(SGPR)
|
llvm_v4i32_ty, // rsrc(SGPR)
|
||||||
llvm_i32_ty, // vindex(VGPR)
|
llvm_i32_ty, // vindex(VGPR)
|
||||||
llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
|
llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
|
||||||
|
|
|
@ -1074,24 +1074,34 @@ multiclass MUBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
|
||||||
}
|
}
|
||||||
|
|
||||||
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format, f32, "BUFFER_LOAD_FORMAT_X">;
|
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format, f32, "BUFFER_LOAD_FORMAT_X">;
|
||||||
|
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format, i32, "BUFFER_LOAD_FORMAT_X">;
|
||||||
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format, v2f32, "BUFFER_LOAD_FORMAT_XY">;
|
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format, v2f32, "BUFFER_LOAD_FORMAT_XY">;
|
||||||
|
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format, v2i32, "BUFFER_LOAD_FORMAT_XY">;
|
||||||
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format, v4f32, "BUFFER_LOAD_FORMAT_XYZW">;
|
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format, v4f32, "BUFFER_LOAD_FORMAT_XYZW">;
|
||||||
|
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format, v4i32, "BUFFER_LOAD_FORMAT_XYZW">;
|
||||||
|
|
||||||
let SubtargetPredicate = HasUnpackedD16VMem in {
|
let SubtargetPredicate = HasUnpackedD16VMem in {
|
||||||
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, f16, "BUFFER_LOAD_FORMAT_D16_X_gfx80">;
|
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, f16, "BUFFER_LOAD_FORMAT_D16_X_gfx80">;
|
||||||
|
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, i16, "BUFFER_LOAD_FORMAT_D16_X_gfx80">;
|
||||||
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, v2i32, "BUFFER_LOAD_FORMAT_D16_XY_gfx80">;
|
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, v2i32, "BUFFER_LOAD_FORMAT_D16_XY_gfx80">;
|
||||||
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, v4i32, "BUFFER_LOAD_FORMAT_D16_XYZW_gfx80">;
|
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, v4i32, "BUFFER_LOAD_FORMAT_D16_XYZW_gfx80">;
|
||||||
} // End HasUnpackedD16VMem.
|
} // End HasUnpackedD16VMem.
|
||||||
|
|
||||||
let SubtargetPredicate = HasPackedD16VMem in {
|
let SubtargetPredicate = HasPackedD16VMem in {
|
||||||
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, f16, "BUFFER_LOAD_FORMAT_D16_X">;
|
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, f16, "BUFFER_LOAD_FORMAT_D16_X">;
|
||||||
|
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, i16, "BUFFER_LOAD_FORMAT_D16_X">;
|
||||||
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, v2f16, "BUFFER_LOAD_FORMAT_D16_XY">;
|
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, v2f16, "BUFFER_LOAD_FORMAT_D16_XY">;
|
||||||
|
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, v2i16, "BUFFER_LOAD_FORMAT_D16_XY">;
|
||||||
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, v4f16, "BUFFER_LOAD_FORMAT_D16_XYZW">;
|
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, v4f16, "BUFFER_LOAD_FORMAT_D16_XYZW">;
|
||||||
|
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, v4i16, "BUFFER_LOAD_FORMAT_D16_XYZW">;
|
||||||
} // End HasPackedD16VMem.
|
} // End HasPackedD16VMem.
|
||||||
|
|
||||||
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, f32, "BUFFER_LOAD_DWORD">;
|
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, f32, "BUFFER_LOAD_DWORD">;
|
||||||
|
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, i32, "BUFFER_LOAD_DWORD">;
|
||||||
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, v2f32, "BUFFER_LOAD_DWORDX2">;
|
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, v2f32, "BUFFER_LOAD_DWORDX2">;
|
||||||
|
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, v2i32, "BUFFER_LOAD_DWORDX2">;
|
||||||
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, v4f32, "BUFFER_LOAD_DWORDX4">;
|
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, v4f32, "BUFFER_LOAD_DWORDX4">;
|
||||||
|
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, v4i32, "BUFFER_LOAD_DWORDX4">;
|
||||||
|
|
||||||
multiclass MUBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
|
multiclass MUBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
|
||||||
string opcode> {
|
string opcode> {
|
||||||
|
@ -1128,24 +1138,34 @@ multiclass MUBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
|
||||||
}
|
}
|
||||||
|
|
||||||
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format, f32, "BUFFER_STORE_FORMAT_X">;
|
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format, f32, "BUFFER_STORE_FORMAT_X">;
|
||||||
|
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format, i32, "BUFFER_STORE_FORMAT_X">;
|
||||||
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format, v2f32, "BUFFER_STORE_FORMAT_XY">;
|
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format, v2f32, "BUFFER_STORE_FORMAT_XY">;
|
||||||
|
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format, v2i32, "BUFFER_STORE_FORMAT_XY">;
|
||||||
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format, v4f32, "BUFFER_STORE_FORMAT_XYZW">;
|
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format, v4f32, "BUFFER_STORE_FORMAT_XYZW">;
|
||||||
|
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format, v4i32, "BUFFER_STORE_FORMAT_XYZW">;
|
||||||
|
|
||||||
let SubtargetPredicate = HasUnpackedD16VMem in {
|
let SubtargetPredicate = HasUnpackedD16VMem in {
|
||||||
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, f16, "BUFFER_STORE_FORMAT_D16_X_gfx80">;
|
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, f16, "BUFFER_STORE_FORMAT_D16_X_gfx80">;
|
||||||
|
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, i16, "BUFFER_STORE_FORMAT_D16_X_gfx80">;
|
||||||
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, v2i32, "BUFFER_STORE_FORMAT_D16_XY_gfx80">;
|
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, v2i32, "BUFFER_STORE_FORMAT_D16_XY_gfx80">;
|
||||||
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, v4i32, "BUFFER_STORE_FORMAT_D16_XYZW_gfx80">;
|
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, v4i32, "BUFFER_STORE_FORMAT_D16_XYZW_gfx80">;
|
||||||
} // End HasUnpackedD16VMem.
|
} // End HasUnpackedD16VMem.
|
||||||
|
|
||||||
let SubtargetPredicate = HasPackedD16VMem in {
|
let SubtargetPredicate = HasPackedD16VMem in {
|
||||||
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, f16, "BUFFER_STORE_FORMAT_D16_X">;
|
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, f16, "BUFFER_STORE_FORMAT_D16_X">;
|
||||||
|
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, i16, "BUFFER_STORE_FORMAT_D16_X">;
|
||||||
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, v2f16, "BUFFER_STORE_FORMAT_D16_XY">;
|
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, v2f16, "BUFFER_STORE_FORMAT_D16_XY">;
|
||||||
|
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, v2i16, "BUFFER_STORE_FORMAT_D16_XY">;
|
||||||
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, v4f16, "BUFFER_STORE_FORMAT_D16_XYZW">;
|
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, v4f16, "BUFFER_STORE_FORMAT_D16_XYZW">;
|
||||||
|
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, v4i16, "BUFFER_STORE_FORMAT_D16_XYZW">;
|
||||||
} // End HasPackedD16VMem.
|
} // End HasPackedD16VMem.
|
||||||
|
|
||||||
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, f32, "BUFFER_STORE_DWORD">;
|
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, f32, "BUFFER_STORE_DWORD">;
|
||||||
|
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, i32, "BUFFER_STORE_DWORD">;
|
||||||
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, v2f32, "BUFFER_STORE_DWORDX2">;
|
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, v2f32, "BUFFER_STORE_DWORDX2">;
|
||||||
|
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, v2i32, "BUFFER_STORE_DWORDX2">;
|
||||||
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, v4f32, "BUFFER_STORE_DWORDX4">;
|
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, v4f32, "BUFFER_STORE_DWORDX4">;
|
||||||
|
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, v4i32, "BUFFER_STORE_DWORDX4">;
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// buffer_atomic patterns
|
// buffer_atomic patterns
|
||||||
|
|
|
@ -198,9 +198,31 @@ main_body:
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
;CHECK-LABEL: {{^}}buffer_load_int:
|
||||||
|
;CHECK: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
|
||||||
|
;CHECK: buffer_load_dwordx2 v[4:5], off, s[0:3], 0 glc
|
||||||
|
;CHECK: buffer_load_dword v6, off, s[0:3], 0 slc
|
||||||
|
;CHECK: s_waitcnt
|
||||||
|
define amdgpu_ps {<4 x float>, <2 x float>, float} @buffer_load_int(<4 x i32> inreg) {
|
||||||
|
main_body:
|
||||||
|
%data = call <4 x i32> @llvm.amdgcn.raw.buffer.load.v4i32(<4 x i32> %0, i32 0, i32 0, i32 0)
|
||||||
|
%data_glc = call <2 x i32> @llvm.amdgcn.raw.buffer.load.v2i32(<4 x i32> %0, i32 0, i32 0, i32 1)
|
||||||
|
%data_slc = call i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32> %0, i32 0, i32 0, i32 2)
|
||||||
|
%fdata = bitcast <4 x i32> %data to <4 x float>
|
||||||
|
%fdata_glc = bitcast <2 x i32> %data_glc to <2 x float>
|
||||||
|
%fdata_slc = bitcast i32 %data_slc to float
|
||||||
|
%r0 = insertvalue {<4 x float>, <2 x float>, float} undef, <4 x float> %fdata, 0
|
||||||
|
%r1 = insertvalue {<4 x float>, <2 x float>, float} %r0, <2 x float> %fdata_glc, 1
|
||||||
|
%r2 = insertvalue {<4 x float>, <2 x float>, float} %r1, float %fdata_slc, 2
|
||||||
|
ret {<4 x float>, <2 x float>, float} %r2
|
||||||
|
}
|
||||||
|
|
||||||
declare float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32>, i32, i32, i32) #0
|
declare float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32>, i32, i32, i32) #0
|
||||||
declare <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32>, i32, i32, i32) #0
|
declare <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32>, i32, i32, i32) #0
|
||||||
declare <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32>, i32, i32, i32) #0
|
declare <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32>, i32, i32, i32) #0
|
||||||
|
declare i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32>, i32, i32, i32) #0
|
||||||
|
declare <2 x i32> @llvm.amdgcn.raw.buffer.load.v2i32(<4 x i32>, i32, i32, i32) #0
|
||||||
|
declare <4 x i32> @llvm.amdgcn.raw.buffer.load.v4i32(<4 x i32>, i32, i32, i32) #0
|
||||||
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
|
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
|
||||||
|
|
||||||
attributes #0 = { nounwind readonly }
|
attributes #0 = { nounwind readonly }
|
||||||
|
|
|
@ -142,9 +142,25 @@ define amdgpu_ps void @buffer_store_x2_offset_merged(<4 x i32> inreg %rsrc, <2 x
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
;CHECK-LABEL: {{^}}buffer_store_int:
|
||||||
|
;CHECK-NOT: s_waitcnt
|
||||||
|
;CHECK: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
|
||||||
|
;CHECK: buffer_store_dwordx2 v[4:5], off, s[0:3], 0 glc
|
||||||
|
;CHECK: buffer_store_dword v6, off, s[0:3], 0 slc
|
||||||
|
define amdgpu_ps void @buffer_store_int(<4 x i32> inreg, <4 x i32>, <2 x i32>, i32) {
|
||||||
|
main_body:
|
||||||
|
call void @llvm.amdgcn.raw.buffer.store.v4i32(<4 x i32> %1, <4 x i32> %0, i32 0, i32 0, i32 0)
|
||||||
|
call void @llvm.amdgcn.raw.buffer.store.v2i32(<2 x i32> %2, <4 x i32> %0, i32 0, i32 0, i32 1)
|
||||||
|
call void @llvm.amdgcn.raw.buffer.store.i32(i32 %3, <4 x i32> %0, i32 0, i32 0, i32 2)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
declare void @llvm.amdgcn.raw.buffer.store.f32(float, <4 x i32>, i32, i32, i32) #0
|
declare void @llvm.amdgcn.raw.buffer.store.f32(float, <4 x i32>, i32, i32, i32) #0
|
||||||
declare void @llvm.amdgcn.raw.buffer.store.v2f32(<2 x float>, <4 x i32>, i32, i32, i32) #0
|
declare void @llvm.amdgcn.raw.buffer.store.v2f32(<2 x float>, <4 x i32>, i32, i32, i32) #0
|
||||||
declare void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32) #0
|
declare void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32) #0
|
||||||
|
declare void @llvm.amdgcn.raw.buffer.store.i32(i32, <4 x i32>, i32, i32, i32) #0
|
||||||
|
declare void @llvm.amdgcn.raw.buffer.store.v2i32(<2 x i32>, <4 x i32>, i32, i32, i32) #0
|
||||||
|
declare void @llvm.amdgcn.raw.buffer.store.v4i32(<4 x i32>, <4 x i32>, i32, i32, i32) #0
|
||||||
declare <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32>, i32, i32, i32) #1
|
declare <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32>, i32, i32, i32) #1
|
||||||
|
|
||||||
attributes #0 = { nounwind }
|
attributes #0 = { nounwind }
|
||||||
|
|
|
@ -125,9 +125,31 @@ entry:
|
||||||
ret float %val
|
ret float %val
|
||||||
}
|
}
|
||||||
|
|
||||||
|
;CHECK-LABEL: {{^}}buffer_load_int:
|
||||||
|
;CHECK: buffer_load_dwordx4 v[0:3], {{v[0-9]+}}, s[0:3], 0 idxen
|
||||||
|
;CHECK: buffer_load_dwordx2 v[4:5], {{v[0-9]+}}, s[0:3], 0 idxen glc
|
||||||
|
;CHECK: buffer_load_dword v6, {{v[0-9]+}}, s[0:3], 0 idxen slc
|
||||||
|
;CHECK: s_waitcnt
|
||||||
|
define amdgpu_ps {<4 x float>, <2 x float>, float} @buffer_load_int(<4 x i32> inreg) {
|
||||||
|
main_body:
|
||||||
|
%data = call <4 x i32> @llvm.amdgcn.struct.buffer.load.v4i32(<4 x i32> %0, i32 0, i32 0, i32 0, i32 0)
|
||||||
|
%data_glc = call <2 x i32> @llvm.amdgcn.struct.buffer.load.v2i32(<4 x i32> %0, i32 0, i32 0, i32 0, i32 1)
|
||||||
|
%data_slc = call i32 @llvm.amdgcn.struct.buffer.load.i32(<4 x i32> %0, i32 0, i32 0, i32 0, i32 2)
|
||||||
|
%fdata = bitcast <4 x i32> %data to <4 x float>
|
||||||
|
%fdata_glc = bitcast <2 x i32> %data_glc to <2 x float>
|
||||||
|
%fdata_slc = bitcast i32 %data_slc to float
|
||||||
|
%r0 = insertvalue {<4 x float>, <2 x float>, float} undef, <4 x float> %fdata, 0
|
||||||
|
%r1 = insertvalue {<4 x float>, <2 x float>, float} %r0, <2 x float> %fdata_glc, 1
|
||||||
|
%r2 = insertvalue {<4 x float>, <2 x float>, float} %r1, float %fdata_slc, 2
|
||||||
|
ret {<4 x float>, <2 x float>, float} %r2
|
||||||
|
}
|
||||||
|
|
||||||
declare float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32>, i32, i32, i32, i32) #0
|
declare float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32>, i32, i32, i32, i32) #0
|
||||||
declare <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32>, i32, i32, i32, i32) #0
|
declare <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32>, i32, i32, i32, i32) #0
|
||||||
declare <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32>, i32, i32, i32, i32) #0
|
declare <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32>, i32, i32, i32, i32) #0
|
||||||
|
declare i32 @llvm.amdgcn.struct.buffer.load.i32(<4 x i32>, i32, i32, i32, i32) #0
|
||||||
|
declare <2 x i32> @llvm.amdgcn.struct.buffer.load.v2i32(<4 x i32>, i32, i32, i32, i32) #0
|
||||||
|
declare <4 x i32> @llvm.amdgcn.struct.buffer.load.v4i32(<4 x i32>, i32, i32, i32, i32) #0
|
||||||
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
|
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
|
||||||
|
|
||||||
attributes #0 = { nounwind readonly }
|
attributes #0 = { nounwind readonly }
|
||||||
|
|
|
@ -95,9 +95,25 @@ main_body:
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
;CHECK-LABEL: {{^}}buffer_store_int:
|
||||||
|
;CHECK-NOT: s_waitcnt
|
||||||
|
;CHECK: buffer_store_dwordx4 v[0:3], {{v[0-9]+}}, s[0:3], 0 idxen
|
||||||
|
;CHECK: buffer_store_dwordx2 v[4:5], {{v[0-9]+}}, s[0:3], 0 idxen glc
|
||||||
|
;CHECK: buffer_store_dword v6, {{v[0-9]+}}, s[0:3], 0 idxen slc
|
||||||
|
define amdgpu_ps void @buffer_store_int(<4 x i32> inreg, <4 x i32>, <2 x i32>, i32) {
|
||||||
|
main_body:
|
||||||
|
call void @llvm.amdgcn.struct.buffer.store.v4i32(<4 x i32> %1, <4 x i32> %0, i32 0, i32 0, i32 0, i32 0)
|
||||||
|
call void @llvm.amdgcn.struct.buffer.store.v2i32(<2 x i32> %2, <4 x i32> %0, i32 0, i32 0, i32 0, i32 1)
|
||||||
|
call void @llvm.amdgcn.struct.buffer.store.i32(i32 %3, <4 x i32> %0, i32 0, i32 0, i32 0, i32 2)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
declare void @llvm.amdgcn.struct.buffer.store.f32(float, <4 x i32>, i32, i32, i32, i32) #0
|
declare void @llvm.amdgcn.struct.buffer.store.f32(float, <4 x i32>, i32, i32, i32, i32) #0
|
||||||
declare void @llvm.amdgcn.struct.buffer.store.v2f32(<2 x float>, <4 x i32>, i32, i32, i32, i32) #0
|
declare void @llvm.amdgcn.struct.buffer.store.v2f32(<2 x float>, <4 x i32>, i32, i32, i32, i32) #0
|
||||||
declare void @llvm.amdgcn.struct.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32, i32) #0
|
declare void @llvm.amdgcn.struct.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32, i32) #0
|
||||||
|
declare void @llvm.amdgcn.struct.buffer.store.i32(i32, <4 x i32>, i32, i32, i32, i32) #0
|
||||||
|
declare void @llvm.amdgcn.struct.buffer.store.v2i32(<2 x i32>, <4 x i32>, i32, i32, i32, i32) #0
|
||||||
|
declare void @llvm.amdgcn.struct.buffer.store.v4i32(<4 x i32>, <4 x i32>, i32, i32, i32, i32) #0
|
||||||
declare <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32>, i32, i32, i32, i32) #1
|
declare <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32>, i32, i32, i32, i32) #1
|
||||||
|
|
||||||
attributes #0 = { nounwind }
|
attributes #0 = { nounwind }
|
||||||
|
|
Loading…
Reference in New Issue