R600: Correctly set the src value offset for scalarized kernel args
This for some reason fixes v1i64 kernel arguments on pre-SI. This currently breaks some other cases in the kernel-args.ll test for R600, but I'm not particularly confident in the new output. VTX_READ_* are not used for some of the scalarized cases, and the code reading from the constant buffer doesn't make much sense to me. llvm-svn: 215564
This commit is contained in:
parent
5aa2194ea5
commit
74ef277774
|
@ -1705,8 +1705,13 @@ SDValue R600TargetLowering::LowerFormalArguments(
|
||||||
|
|
||||||
for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
|
for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
|
||||||
CCValAssign &VA = ArgLocs[i];
|
CCValAssign &VA = ArgLocs[i];
|
||||||
EVT VT = Ins[i].VT;
|
const ISD::InputArg &In = Ins[i];
|
||||||
EVT MemVT = LocalIns[i].VT;
|
EVT VT = In.VT;
|
||||||
|
EVT MemVT = VA.getLocVT();
|
||||||
|
if (!VT.isVector() && MemVT.isVector()) {
|
||||||
|
// Get load source type if scalarized.
|
||||||
|
MemVT = MemVT.getVectorElementType();
|
||||||
|
}
|
||||||
|
|
||||||
if (ShaderType != ShaderType::COMPUTE) {
|
if (ShaderType != ShaderType::COMPUTE) {
|
||||||
unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass);
|
unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass);
|
||||||
|
@ -1716,7 +1721,7 @@ SDValue R600TargetLowering::LowerFormalArguments(
|
||||||
}
|
}
|
||||||
|
|
||||||
PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
|
PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
|
||||||
AMDGPUAS::CONSTANT_BUFFER_0);
|
AMDGPUAS::CONSTANT_BUFFER_0);
|
||||||
|
|
||||||
// i64 isn't a legal type, so the register type used ends up as i32, which
|
// i64 isn't a legal type, so the register type used ends up as i32, which
|
||||||
// isn't expected here. It attempts to create this sextload, but it ends up
|
// isn't expected here. It attempts to create this sextload, but it ends up
|
||||||
|
@ -1725,15 +1730,28 @@ SDValue R600TargetLowering::LowerFormalArguments(
|
||||||
|
|
||||||
// The first 36 bytes of the input buffer contains information about
|
// The first 36 bytes of the input buffer contains information about
|
||||||
// thread group and global sizes.
|
// thread group and global sizes.
|
||||||
|
ISD::LoadExtType Ext = ISD::NON_EXTLOAD;
|
||||||
|
if (MemVT.getScalarSizeInBits() != VT.getScalarSizeInBits()) {
|
||||||
|
// FIXME: This should really check the extload type, but the handling of
|
||||||
|
// extload vector parameters seems to be broken.
|
||||||
|
|
||||||
// FIXME: This should really check the extload type, but the handling of
|
// Ext = In.Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
|
||||||
// extload vecto parameters seems to be broken.
|
Ext = ISD::SEXTLOAD;
|
||||||
//ISD::LoadExtType Ext = Ins[i].Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
|
}
|
||||||
ISD::LoadExtType Ext = ISD::SEXTLOAD;
|
|
||||||
SDValue Arg = DAG.getExtLoad(Ext, DL, VT, Chain,
|
// Compute the offset from the value.
|
||||||
DAG.getConstant(36 + VA.getLocMemOffset(), MVT::i32),
|
// XXX - I think PartOffset should give you this, but it seems to give the
|
||||||
MachinePointerInfo(UndefValue::get(PtrTy)),
|
// size of the register which isn't useful.
|
||||||
MemVT, false, false, false, 4);
|
|
||||||
|
unsigned ValBase = ArgLocs[In.OrigArgIndex].getLocMemOffset();
|
||||||
|
unsigned PartOffset = VA.getLocMemOffset();
|
||||||
|
|
||||||
|
MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase);
|
||||||
|
SDValue Arg = DAG.getLoad(ISD::UNINDEXED, Ext, VT, DL, Chain,
|
||||||
|
DAG.getConstant(36 + PartOffset, MVT::i32),
|
||||||
|
DAG.getUNDEF(MVT::i32),
|
||||||
|
PtrInfo,
|
||||||
|
MemVT, false, true, true, 4);
|
||||||
|
|
||||||
// 4 is the preferred alignment for the CONSTANT memory space.
|
// 4 is the preferred alignment for the CONSTANT memory space.
|
||||||
InVals.push_back(Arg);
|
InVals.push_back(Arg);
|
||||||
|
|
|
@ -453,3 +453,21 @@ entry:
|
||||||
store <16 x float> %in, <16 x float> addrspace(1)* %out, align 4
|
store <16 x float> %in, <16 x float> addrspace(1)* %out, align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; FUNC-LABEL: @kernel_arg_i64
|
||||||
|
; SI: S_LOAD_DWORDX2
|
||||||
|
; SI: S_LOAD_DWORDX2
|
||||||
|
; SI: BUFFER_STORE_DWORDX2
|
||||||
|
define void @kernel_arg_i64(i64 addrspace(1)* %out, i64 %a) nounwind {
|
||||||
|
store i64 %a, i64 addrspace(1)* %out, align 8
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; XFUNC-LABEL: @kernel_arg_v1i64
|
||||||
|
; XSI: S_LOAD_DWORDX2
|
||||||
|
; XSI: S_LOAD_DWORDX2
|
||||||
|
; XSI: BUFFER_STORE_DWORDX2
|
||||||
|
; define void @kernel_arg_v1i64(<1 x i64> addrspace(1)* %out, <1 x i64> %a) nounwind {
|
||||||
|
; store <1 x i64> %a, <1 x i64> addrspace(1)* %out, align 8
|
||||||
|
; ret void
|
||||||
|
; }
|
||||||
|
|
Loading…
Reference in New Issue