R600/SI: Add support for buffer stores v2
v2: - Use the ADDR64 bit Reviewed-by: Christian König <christian.koenig@amd.com> llvm-svn: 178931
This commit is contained in:
parent
6db08eb42f
commit
754f80ff3a
|
@ -32,8 +32,14 @@ def CC_SI : CallingConv<[
|
||||||
VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
|
VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
|
||||||
VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
|
VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
|
||||||
VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31
|
VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31
|
||||||
]>>>
|
]>>>,
|
||||||
|
|
||||||
|
// This is the default for i64 values.
|
||||||
|
// XXX: We should change this once clang understands the CC_AMDGPU.
|
||||||
|
CCIfType<[i64], CCAssignToRegWithShadow<
|
||||||
|
[ SGPR0, SGPR2, SGPR4, SGPR6, SGPR8, SGPR10, SGPR12, SGPR14 ],
|
||||||
|
[ SGPR1, SGPR3, SGPR5, SGPR7, SGPR9, SGPR11, SGPR13, SGPR15 ]
|
||||||
|
>>
|
||||||
]>;
|
]>;
|
||||||
|
|
||||||
def CC_AMDGPU : CallingConv<[
|
def CC_AMDGPU : CallingConv<[
|
||||||
|
|
|
@ -116,6 +116,7 @@ enum {
|
||||||
BRANCH_COND,
|
BRANCH_COND,
|
||||||
// End AMDIL ISD Opcodes
|
// End AMDIL ISD Opcodes
|
||||||
BITALIGN,
|
BITALIGN,
|
||||||
|
BUFFER_STORE,
|
||||||
DWORDADDR,
|
DWORDADDR,
|
||||||
FRACT,
|
FRACT,
|
||||||
FMAX,
|
FMAX,
|
||||||
|
|
|
@ -191,6 +191,29 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
|
||||||
return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(),
|
return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(),
|
||||||
RegSeqArgs, 2 * N->getNumOperands() + 1);
|
RegSeqArgs, 2 * N->getNumOperands() + 1);
|
||||||
}
|
}
|
||||||
|
case ISD::BUILD_PAIR: {
|
||||||
|
SDValue RC, SubReg0, SubReg1;
|
||||||
|
const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
|
||||||
|
if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (N->getValueType(0) == MVT::i128) {
|
||||||
|
RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, MVT::i32);
|
||||||
|
SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, MVT::i32);
|
||||||
|
SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, MVT::i32);
|
||||||
|
} else if (N->getValueType(0) == MVT::i64) {
|
||||||
|
RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, MVT::i32);
|
||||||
|
SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32);
|
||||||
|
SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32);
|
||||||
|
} else {
|
||||||
|
llvm_unreachable("Unhandled value type for BUILD_PAIR");
|
||||||
|
}
|
||||||
|
const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
|
||||||
|
N->getOperand(1), SubReg1 };
|
||||||
|
return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
|
||||||
|
N->getDebugLoc(), N->getValueType(0), Ops, 5);
|
||||||
|
}
|
||||||
|
|
||||||
case ISD::ConstantFP:
|
case ISD::ConstantFP:
|
||||||
case ISD::Constant: {
|
case ISD::Constant: {
|
||||||
const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
|
const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
|
||||||
|
|
|
@ -49,6 +49,7 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
|
||||||
|
|
||||||
addRegisterClass(MVT::v4i32, &AMDGPU::VReg_128RegClass);
|
addRegisterClass(MVT::v4i32, &AMDGPU::VReg_128RegClass);
|
||||||
addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass);
|
addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass);
|
||||||
|
addRegisterClass(MVT::i128, &AMDGPU::SReg_128RegClass);
|
||||||
|
|
||||||
addRegisterClass(MVT::v8i32, &AMDGPU::VReg_256RegClass);
|
addRegisterClass(MVT::v8i32, &AMDGPU::VReg_256RegClass);
|
||||||
addRegisterClass(MVT::v8f32, &AMDGPU::VReg_256RegClass);
|
addRegisterClass(MVT::v8f32, &AMDGPU::VReg_256RegClass);
|
||||||
|
@ -70,6 +71,10 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
|
||||||
setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
|
setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
|
||||||
|
|
||||||
setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
|
setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
|
||||||
|
|
||||||
|
setOperationAction(ISD::STORE, MVT::i32, Custom);
|
||||||
|
setOperationAction(ISD::STORE, MVT::i64, Custom);
|
||||||
|
|
||||||
setTargetDAGCombine(ISD::SELECT_CC);
|
setTargetDAGCombine(ISD::SELECT_CC);
|
||||||
|
|
||||||
setTargetDAGCombine(ISD::SETCC);
|
setTargetDAGCombine(ISD::SETCC);
|
||||||
|
@ -234,6 +239,7 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
|
||||||
default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
|
default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
|
||||||
case ISD::BRCOND: return LowerBRCOND(Op, DAG);
|
case ISD::BRCOND: return LowerBRCOND(Op, DAG);
|
||||||
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
|
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
|
||||||
|
case ISD::STORE: return LowerSTORE(Op, DAG);
|
||||||
}
|
}
|
||||||
return SDValue();
|
return SDValue();
|
||||||
}
|
}
|
||||||
|
@ -332,6 +338,32 @@ SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND,
|
||||||
return Chain;
|
return Chain;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define RSRC_DATA_FORMAT 0xf00000000000
|
||||||
|
|
||||||
|
SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
|
||||||
|
StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
|
||||||
|
SDValue Chain = Op.getOperand(0);
|
||||||
|
SDValue Value = Op.getOperand(1);
|
||||||
|
SDValue VirtualAddress = Op.getOperand(2);
|
||||||
|
DebugLoc DL = Op.getDebugLoc();
|
||||||
|
|
||||||
|
if (StoreNode->getAddressSpace() != AMDGPUAS::GLOBAL_ADDRESS) {
|
||||||
|
return SDValue();
|
||||||
|
}
|
||||||
|
|
||||||
|
SDValue SrcSrc = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128,
|
||||||
|
DAG.getConstant(0, MVT::i64),
|
||||||
|
DAG.getConstant(RSRC_DATA_FORMAT, MVT::i64));
|
||||||
|
|
||||||
|
SDValue Ops[2];
|
||||||
|
Ops[0] = DAG.getNode(AMDGPUISD::BUFFER_STORE, DL, MVT::Other, Chain,
|
||||||
|
Value, SrcSrc, VirtualAddress);
|
||||||
|
Ops[1] = Chain;
|
||||||
|
|
||||||
|
return DAG.getMergeValues(Ops, 2, DL);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
SDValue SITargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
|
SDValue SITargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
|
||||||
SDValue LHS = Op.getOperand(0);
|
SDValue LHS = Op.getOperand(0);
|
||||||
SDValue RHS = Op.getOperand(1);
|
SDValue RHS = Op.getOperand(1);
|
||||||
|
|
|
@ -24,6 +24,7 @@ class SITargetLowering : public AMDGPUTargetLowering {
|
||||||
const SIInstrInfo * TII;
|
const SIInstrInfo * TII;
|
||||||
const TargetRegisterInfo * TRI;
|
const TargetRegisterInfo * TRI;
|
||||||
|
|
||||||
|
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
|
||||||
SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
|
SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
|
||||||
SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
|
SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
|
||||||
|
|
||||||
|
|
|
@ -26,6 +26,10 @@ def HI32 : SDNodeXForm<imm, [{
|
||||||
return CurDAG->getTargetConstant(N->getZExtValue() >> 32, MVT::i32);
|
return CurDAG->getTargetConstant(N->getZExtValue() >> 32, MVT::i32);
|
||||||
}]>;
|
}]>;
|
||||||
|
|
||||||
|
def SIbuffer_store : SDNode<"AMDGPUISD::BUFFER_STORE",
|
||||||
|
SDTypeProfile<0, 3, [SDTCisPtrTy<1>, SDTCisInt<2>]>,
|
||||||
|
[SDNPHasChain, SDNPMayStore]>;
|
||||||
|
|
||||||
def IMM8bitDWORD : ImmLeaf <
|
def IMM8bitDWORD : ImmLeaf <
|
||||||
i32, [{
|
i32, [{
|
||||||
return (Imm & ~0x3FC) == 0;
|
return (Imm & ~0x3FC) == 0;
|
||||||
|
@ -296,6 +300,28 @@ class MUBUF_Load_Helper <bits<7> op, string asm, RegisterClass regClass> : MUBUF
|
||||||
let mayStore = 0;
|
let mayStore = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
class MUBUF_Store_Helper <bits<7> op, string name, RegisterClass vdataClass,
|
||||||
|
ValueType VT> :
|
||||||
|
MUBUF <op, (outs), (ins vdataClass:$vdata, SReg_128:$srsrc, VReg_64:$vaddr),
|
||||||
|
name#" $vdata, $srsrc + $vaddr",
|
||||||
|
[(SIbuffer_store (VT vdataClass:$vdata), (i128 SReg_128:$srsrc),
|
||||||
|
(i64 VReg_64:$vaddr))]> {
|
||||||
|
|
||||||
|
let mayLoad = 0;
|
||||||
|
let mayStore = 1;
|
||||||
|
|
||||||
|
// Encoding
|
||||||
|
let offset = 0;
|
||||||
|
let offen = 0;
|
||||||
|
let idxen = 0;
|
||||||
|
let glc = 0;
|
||||||
|
let addr64 = 1;
|
||||||
|
let lds = 0;
|
||||||
|
let slc = 0;
|
||||||
|
let tfe = 0;
|
||||||
|
let soffset = 128; // ZERO
|
||||||
|
}
|
||||||
|
|
||||||
class MTBUF_Load_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBUF <
|
class MTBUF_Load_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBUF <
|
||||||
op,
|
op,
|
||||||
(outs regClass:$dst),
|
(outs regClass:$dst),
|
||||||
|
|
|
@ -408,8 +408,14 @@ def BUFFER_LOAD_DWORDX2 : MUBUF_Load_Helper <0x0000000d, "BUFFER_LOAD_DWORDX2",
|
||||||
def BUFFER_LOAD_DWORDX4 : MUBUF_Load_Helper <0x0000000e, "BUFFER_LOAD_DWORDX4", VReg_128>;
|
def BUFFER_LOAD_DWORDX4 : MUBUF_Load_Helper <0x0000000e, "BUFFER_LOAD_DWORDX4", VReg_128>;
|
||||||
//def BUFFER_STORE_BYTE : MUBUF_ <0x00000018, "BUFFER_STORE_BYTE", []>;
|
//def BUFFER_STORE_BYTE : MUBUF_ <0x00000018, "BUFFER_STORE_BYTE", []>;
|
||||||
//def BUFFER_STORE_SHORT : MUBUF_ <0x0000001a, "BUFFER_STORE_SHORT", []>;
|
//def BUFFER_STORE_SHORT : MUBUF_ <0x0000001a, "BUFFER_STORE_SHORT", []>;
|
||||||
//def BUFFER_STORE_DWORD : MUBUF_ <0x0000001c, "BUFFER_STORE_DWORD", []>;
|
|
||||||
//def BUFFER_STORE_DWORDX2 : MUBUF_DWORDX2 <0x0000001d, "BUFFER_STORE_DWORDX2", []>;
|
def BUFFER_STORE_DWORD : MUBUF_Store_Helper <
|
||||||
|
0x0000001c, "BUFFER_STORE_DWORD", VReg_32, i32
|
||||||
|
>;
|
||||||
|
|
||||||
|
def BUFFER_STORE_DWORDX2 : MUBUF_Store_Helper <
|
||||||
|
0x0000001d, "BUFFER_STORE_DWORDX2", VReg_64, i64
|
||||||
|
>;
|
||||||
//def BUFFER_STORE_DWORDX4 : MUBUF_DWORDX4 <0x0000001e, "BUFFER_STORE_DWORDX4", []>;
|
//def BUFFER_STORE_DWORDX4 : MUBUF_DWORDX4 <0x0000001e, "BUFFER_STORE_DWORDX4", []>;
|
||||||
//def BUFFER_ATOMIC_SWAP : MUBUF_ <0x00000030, "BUFFER_ATOMIC_SWAP", []>;
|
//def BUFFER_ATOMIC_SWAP : MUBUF_ <0x00000030, "BUFFER_ATOMIC_SWAP", []>;
|
||||||
//def BUFFER_ATOMIC_CMPSWAP : MUBUF_ <0x00000031, "BUFFER_ATOMIC_CMPSWAP", []>;
|
//def BUFFER_ATOMIC_CMPSWAP : MUBUF_ <0x00000031, "BUFFER_ATOMIC_CMPSWAP", []>;
|
||||||
|
|
|
@ -151,7 +151,7 @@ def SReg_64 : RegisterClass<"AMDGPU", [i64, i1], 64,
|
||||||
(add SGPR_64, VCCReg, EXECReg)
|
(add SGPR_64, VCCReg, EXECReg)
|
||||||
>;
|
>;
|
||||||
|
|
||||||
def SReg_128 : RegisterClass<"AMDGPU", [v16i8], 128, (add SGPR_128)>;
|
def SReg_128 : RegisterClass<"AMDGPU", [v16i8, i128], 128, (add SGPR_128)>;
|
||||||
|
|
||||||
def SReg_256 : RegisterClass<"AMDGPU", [v32i8], 256, (add SGPR_256)>;
|
def SReg_256 : RegisterClass<"AMDGPU", [v32i8], 256, (add SGPR_256)>;
|
||||||
|
|
||||||
|
|
|
@ -1,8 +1,5 @@
|
||||||
; RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
|
; RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
|
||||||
|
|
||||||
; XXX: Enable once SI supports buffer stores
|
|
||||||
; XFAIL: *
|
|
||||||
|
|
||||||
; Use a 64-bit value with lo bits that can be represented as an inline constant
|
; Use a 64-bit value with lo bits that can be represented as an inline constant
|
||||||
; CHECK: @i64_imm_inline_lo
|
; CHECK: @i64_imm_inline_lo
|
||||||
; CHECK: S_MOV_B32 [[LO:SGPR[0-9]+]], 5
|
; CHECK: S_MOV_B32 [[LO:SGPR[0-9]+]], 5
|
||||||
|
|
|
@ -0,0 +1,11 @@
|
||||||
|
; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s
|
||||||
|
; RUN: llc < %s -march=r600 -mcpu=verde | FileCheck --check-prefix=SI-CHECK %s
|
||||||
|
|
||||||
|
; CHECK: @store_float
|
||||||
|
; EG-CHECK: RAT_WRITE_CACHELESS_32_eg T{{[0-9]+\.X, T[0-9]+\.X}}, 1
|
||||||
|
; SI-CHECK: BUFFER_STORE_DWORD
|
||||||
|
|
||||||
|
define void @store_float(float addrspace(1)* %out, float %in) {
|
||||||
|
store float %in, float addrspace(1)* %out
|
||||||
|
ret void
|
||||||
|
}
|
Loading…
Reference in New Issue