LegalizeDAG: Don't replace vector load with integer unless legal

On AMDGPU we want to be able to promote i64/f64 loads to v2i32.
If the access is unaligned, this would conclude that since i64 is legal,
it would convert it back to i64 and there is an endless legalization
loop.

Extract the logic for scalarizing the load into a new TargetLowering
function, where this can also replace the custom function AMDGPU
has for this.

llvm-svn: 264927
This commit is contained in:
Matt Arsenault 2016-03-30 21:15:10 +00:00
parent 5d518386b6
commit a4b1b6ea05
4 changed files with 76 additions and 28 deletions

View File

@ -2852,6 +2852,11 @@ public:
/// \returns True, if the expansion was successful, false otherwise
bool expandFP_TO_SINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const;
/// Turn load of vector type into a load of the individual elements.
/// \param LD load to expand
/// \returns MERGE_VALUEs of the scalar loads with their chains.
SDValue scalarizeVectorLoad(LoadSDNode *LD, SelectionDAG &DAG) const;
//===--------------------------------------------------------------------===//
// Instruction Emitting Hooks
//

View File

@ -455,6 +455,14 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
if (VT.isFloatingPoint() || VT.isVector()) {
EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits());
if (TLI.isTypeLegal(intVT) && TLI.isTypeLegal(LoadedVT)) {
if (!TLI.isOperationLegalOrCustom(ISD::LOAD, intVT)) {
// Scalarize the load and let the individual components be handled.
SDValue Scalarized = TLI.scalarizeVectorLoad(LD, DAG);
ValResult = Scalarized.getValue(0);
ChainResult = Scalarized.getValue(1);
return;
}
// Expand to a (misaligned) integer load of the same size,
// then bitconvert to floating point or vector.
SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr,

View File

@ -492,21 +492,26 @@ SDValue VectorLegalizer::PromoteFP_TO_INT(SDValue Op, bool isSigned) {
SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
SDLoc dl(Op);
LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
SDValue Chain = LD->getChain();
SDValue BasePTR = LD->getBasePtr();
EVT SrcVT = LD->getMemoryVT();
ISD::LoadExtType ExtType = LD->getExtensionType();
SmallVector<SDValue, 8> Vals;
SmallVector<SDValue, 8> LoadChains;
EVT SrcVT = LD->getMemoryVT();
EVT SrcEltVT = SrcVT.getScalarType();
unsigned NumElem = SrcVT.getVectorNumElements();
EVT SrcEltVT = SrcVT.getScalarType();
EVT DstEltVT = Op.getNode()->getValueType(0).getScalarType();
SDValue NewChain;
SDValue Value;
if (SrcVT.getVectorNumElements() > 1 && !SrcEltVT.isByteSized()) {
SDLoc dl(Op);
SmallVector<SDValue, 8> Vals;
SmallVector<SDValue, 8> LoadChains;
EVT DstEltVT = LD->getValueType(0).getScalarType();
SDValue Chain = LD->getChain();
SDValue BasePTR = LD->getBasePtr();
ISD::LoadExtType ExtType = LD->getExtensionType();
// When elements in a vector is not byte-addressable, we cannot directly
// load each element by advancing pointer, which could only address bytes.
// Instead, we load all significant words, mask bits off, and concatenate
@ -613,29 +618,17 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
}
Vals.push_back(Lo);
}
NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
Value = DAG.getNode(ISD::BUILD_VECTOR, dl,
Op.getNode()->getValueType(0), Vals);
} else {
unsigned Stride = SrcVT.getScalarType().getSizeInBits()/8;
SDValue Scalarized = TLI.scalarizeVectorLoad(LD, DAG);
for (unsigned Idx=0; Idx<NumElem; Idx++) {
SDValue ScalarLoad = DAG.getExtLoad(ExtType, dl,
Op.getNode()->getValueType(0).getScalarType(),
Chain, BasePTR, LD->getPointerInfo().getWithOffset(Idx * Stride),
SrcVT.getScalarType(),
LD->isVolatile(), LD->isNonTemporal(), LD->isInvariant(),
MinAlign(LD->getAlignment(), Idx * Stride), LD->getAAInfo());
BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR,
DAG.getConstant(Stride, dl, BasePTR.getValueType()));
Vals.push_back(ScalarLoad.getValue(0));
LoadChains.push_back(ScalarLoad.getValue(1));
}
NewChain = Scalarized.getValue(1);
Value = Scalarized.getValue(0);
}
SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
SDValue Value = DAG.getNode(ISD::BUILD_VECTOR, dl,
Op.getNode()->getValueType(0), Vals);
AddLegalizedOperand(Op.getValue(0), Value);
AddLegalizedOperand(Op.getValue(1), NewChain);

View File

@ -3108,6 +3108,48 @@ bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
return true;
}
SDValue TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
SelectionDAG &DAG) const {
SDLoc SL(LD);
SDValue Chain = LD->getChain();
SDValue BasePTR = LD->getBasePtr();
EVT SrcVT = LD->getMemoryVT();
ISD::LoadExtType ExtType = LD->getExtensionType();
unsigned NumElem = SrcVT.getVectorNumElements();
EVT SrcEltVT = SrcVT.getScalarType();
EVT DstEltVT = LD->getValueType(0).getScalarType();
unsigned Stride = SrcEltVT.getSizeInBits() / 8;
assert(SrcEltVT.isByteSized());
EVT PtrVT = BasePTR.getValueType();
SmallVector<SDValue, 8> Vals;
SmallVector<SDValue, 8> LoadChains;
for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
SDValue ScalarLoad = DAG.getExtLoad(
ExtType, SL, DstEltVT,
Chain, BasePTR, LD->getPointerInfo().getWithOffset(Idx * Stride),
SrcEltVT,
LD->isVolatile(), LD->isNonTemporal(), LD->isInvariant(),
MinAlign(LD->getAlignment(), Idx * Stride), LD->getAAInfo());
BasePTR = DAG.getNode(ISD::ADD, SL, PtrVT, BasePTR,
DAG.getConstant(Stride, SL, PtrVT));
Vals.push_back(ScalarLoad.getValue(0));
LoadChains.push_back(ScalarLoad.getValue(1));
}
SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains);
SDValue Value = DAG.getNode(ISD::BUILD_VECTOR, SL, LD->getValueType(0), Vals);
return DAG.getMergeValues({ Value, NewChain }, SL);
}
//===----------------------------------------------------------------------===//
// Implementation of Emulated TLS Model
//===----------------------------------------------------------------------===//