R600: Add support for vector local memory loads

llvm-svn: 189226
This commit is contained in:
Tom Stellard 2013-08-26 15:06:04 +00:00
parent c6f4a29ed5
commit 35bb18c2a7
5 changed files with 64 additions and 0 deletions

View File

@ -424,6 +424,29 @@ SDValue AMDGPUTargetLowering::LowerMinMax(SDValue Op,
return Op;
}
SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue &Op,
SelectionDAG &DAG) const {
LoadSDNode *Load = dyn_cast<LoadSDNode>(Op);
EVT MemEltVT = Load->getMemoryVT().getVectorElementType();
EVT EltVT = Op.getValueType().getVectorElementType();
EVT PtrVT = Load->getBasePtr().getValueType();
unsigned NumElts = Load->getMemoryVT().getVectorNumElements();
SmallVector<SDValue, 8> Loads;
SDLoc SL(Op);
for (unsigned i = 0, e = NumElts; i != e; ++i) {
SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT, Load->getBasePtr(),
DAG.getConstant(i * (MemEltVT.getSizeInBits() / 8), PtrVT));
Loads.push_back(DAG.getExtLoad(Load->getExtensionType(), SL, EltVT,
Load->getChain(), Ptr,
MachinePointerInfo(Load->getMemOperand()->getValue()),
MemEltVT, Load->isVolatile(), Load->isNonTemporal(),
Load->getAlignment()));
}
return DAG.getNode(ISD::BUILD_VECTOR, SL, Op.getValueType(), &Loads[0],
Loads.size());
}
SDValue AMDGPUTargetLowering::MergeVectorStore(const SDValue &Op,
SelectionDAG &DAG) const {
StoreSDNode *Store = dyn_cast<StoreSDNode>(Op);

View File

@ -50,6 +50,8 @@ protected:
unsigned Reg, EVT VT) const;
SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op,
SelectionDAG &DAG) const;
/// \brief Split a vector load into multiple scalar loads.
SDValue SplitVectorLoad(const SDValue &Op, SelectionDAG &DAG) const;
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
bool isHWTrueValue(SDValue Op) const;
bool isHWFalseValue(SDValue Op) const;

View File

@ -1155,6 +1155,14 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
SDValue Ptr = Op.getOperand(1);
SDValue LoweredLoad;
if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
SDValue MergedValues[2] = {
SplitVectorLoad(Op, DAG),
Chain
};
return DAG.getMergeValues(MergedValues, 2, DL);
}
int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
if (ConstantBlock > -1) {
SDValue Result;

View File

@ -66,6 +66,10 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
setOperationAction(ISD::BITCAST, MVT::i128, Legal);
// We need to custom lower vector stores from local memory
setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
@ -368,6 +372,19 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
case ISD::BRCOND: return LowerBRCOND(Op, DAG);
case ISD::LOAD: {
LoadSDNode *Load = dyn_cast<LoadSDNode>(Op);
if (Load->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS &&
Op.getValueType().isVector()) {
SDValue MergedValues[2] = {
SplitVectorLoad(Op, DAG),
Load->getChain()
};
return DAG.getMergeValues(MergedValues, 2, SDLoc(Op));
} else {
return SDValue();
}
}
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
case ISD::SIGN_EXTEND: return LowerSIGN_EXTEND(Op, DAG);
case ISD::ZERO_EXTEND: return LowerZERO_EXTEND(Op, DAG);

View File

@ -516,3 +516,17 @@ entry:
store float %0, float addrspace(1)* %out
ret void
}
; load a v2f32 value from the local address space
; R600-CHECK: @load_v2f32_local
; R600-CHECK: LDS_READ_RET
; R600-CHECK: LDS_READ_RET
; SI-CHECK: @load_v2f32_local
; SI-CHECK: DS_READ_B32
; SI-CHECK: DS_READ_B32
define void @load_v2f32_local(<2 x float> addrspace(1)* %out, <2 x float> addrspace(3)* %in) {
entry:
%0 = load <2 x float> addrspace(3)* %in
store <2 x float> %0, <2 x float> addrspace(1)* %out
ret void
}