[TargetLowering] findOptimalMemOpLowering. NFCI.
This was a local static funtion in SelectionDAG, which I've promoted to TargetLowering so that I can reuse it to estimate the cost of a memory operation in D59787. Differential Revision: https://reviews.llvm.org/D59766 llvm-svn: 359543
This commit is contained in:
parent
59a4c0481a
commit
0ed4619679
|
@ -2934,6 +2934,20 @@ public:
|
|||
}
|
||||
};
|
||||
|
||||
/// Determines the optimal series of memory ops to replace the memset / memcpy.
|
||||
/// Return true if the number of memory ops is below the threshold (Limit).
|
||||
/// It returns the types of the sequence of memory ops to perform
|
||||
/// memset / memcpy by reference.
|
||||
bool findOptimalMemOpLowering(std::vector<EVT> &MemOps,
|
||||
unsigned Limit, uint64_t Size,
|
||||
unsigned DstAlign, unsigned SrcAlign,
|
||||
bool IsMemset,
|
||||
bool ZeroMemset,
|
||||
bool MemcpyStrSrc,
|
||||
bool AllowOverlap,
|
||||
unsigned DstAS, unsigned SrcAS,
|
||||
const AttributeList &FuncAttributes) const;
|
||||
|
||||
/// Check to see if the specified operand of the specified instruction is a
|
||||
/// constant integer. If so, check to see if there are any bits set in the
|
||||
/// constant that are not demanded. If so, shrink the constant and return
|
||||
|
|
|
@ -5563,111 +5563,6 @@ static bool isMemSrcFromConstant(SDValue Src, ConstantDataArraySlice &Slice) {
|
|||
SrcDelta + G->getOffset());
|
||||
}
|
||||
|
||||
/// Determines the optimal series of memory ops to replace the memset / memcpy.
|
||||
/// Return true if the number of memory ops is below the threshold (Limit).
|
||||
/// It returns the types of the sequence of memory ops to perform
|
||||
/// memset / memcpy by reference.
|
||||
static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
|
||||
unsigned Limit, uint64_t Size,
|
||||
unsigned DstAlign, unsigned SrcAlign,
|
||||
bool IsMemset,
|
||||
bool ZeroMemset,
|
||||
bool MemcpyStrSrc,
|
||||
bool AllowOverlap,
|
||||
unsigned DstAS, unsigned SrcAS,
|
||||
SelectionDAG &DAG,
|
||||
const TargetLowering &TLI) {
|
||||
assert((SrcAlign == 0 || SrcAlign >= DstAlign) &&
|
||||
"Expecting memcpy / memset source to meet alignment requirement!");
|
||||
// If 'SrcAlign' is zero, that means the memory operation does not need to
|
||||
// load the value, i.e. memset or memcpy from constant string. Otherwise,
|
||||
// it's the inferred alignment of the source. 'DstAlign', on the other hand,
|
||||
// is the specified alignment of the memory operation. If it is zero, that
|
||||
// means it's possible to change the alignment of the destination.
|
||||
// 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does
|
||||
// not need to be loaded.
|
||||
const Function &F = DAG.getMachineFunction().getFunction();
|
||||
EVT VT = TLI.getOptimalMemOpType(Size, DstAlign, SrcAlign,
|
||||
IsMemset, ZeroMemset, MemcpyStrSrc,
|
||||
F.getAttributes());
|
||||
|
||||
if (VT == MVT::Other) {
|
||||
// Use the largest integer type whose alignment constraints are satisfied.
|
||||
// We only need to check DstAlign here as SrcAlign is always greater or
|
||||
// equal to DstAlign (or zero).
|
||||
VT = MVT::i64;
|
||||
while (DstAlign && DstAlign < VT.getSizeInBits() / 8 &&
|
||||
!TLI.allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign))
|
||||
VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
|
||||
assert(VT.isInteger());
|
||||
|
||||
// Find the largest legal integer type.
|
||||
MVT LVT = MVT::i64;
|
||||
while (!TLI.isTypeLegal(LVT))
|
||||
LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
|
||||
assert(LVT.isInteger());
|
||||
|
||||
// If the type we've chosen is larger than the largest legal integer type
|
||||
// then use that instead.
|
||||
if (VT.bitsGT(LVT))
|
||||
VT = LVT;
|
||||
}
|
||||
|
||||
unsigned NumMemOps = 0;
|
||||
while (Size != 0) {
|
||||
unsigned VTSize = VT.getSizeInBits() / 8;
|
||||
while (VTSize > Size) {
|
||||
// For now, only use non-vector load / store's for the left-over pieces.
|
||||
EVT NewVT = VT;
|
||||
unsigned NewVTSize;
|
||||
|
||||
bool Found = false;
|
||||
if (VT.isVector() || VT.isFloatingPoint()) {
|
||||
NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
|
||||
if (TLI.isOperationLegalOrCustom(ISD::STORE, NewVT) &&
|
||||
TLI.isSafeMemOpType(NewVT.getSimpleVT()))
|
||||
Found = true;
|
||||
else if (NewVT == MVT::i64 &&
|
||||
TLI.isOperationLegalOrCustom(ISD::STORE, MVT::f64) &&
|
||||
TLI.isSafeMemOpType(MVT::f64)) {
|
||||
// i64 is usually not legal on 32-bit targets, but f64 may be.
|
||||
NewVT = MVT::f64;
|
||||
Found = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (!Found) {
|
||||
do {
|
||||
NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
|
||||
if (NewVT == MVT::i8)
|
||||
break;
|
||||
} while (!TLI.isSafeMemOpType(NewVT.getSimpleVT()));
|
||||
}
|
||||
NewVTSize = NewVT.getSizeInBits() / 8;
|
||||
|
||||
// If the new VT cannot cover all of the remaining bits, then consider
|
||||
// issuing a (or a pair of) unaligned and overlapping load / store.
|
||||
bool Fast;
|
||||
if (NumMemOps && AllowOverlap && NewVTSize < Size &&
|
||||
TLI.allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign, &Fast) &&
|
||||
Fast)
|
||||
VTSize = Size;
|
||||
else {
|
||||
VT = NewVT;
|
||||
VTSize = NewVTSize;
|
||||
}
|
||||
}
|
||||
|
||||
if (++NumMemOps > Limit)
|
||||
return false;
|
||||
|
||||
MemOps.push_back(VT);
|
||||
Size -= VTSize;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool shouldLowerMemFuncForSize(const MachineFunction &MF) {
|
||||
// On Darwin, -Os means optimize for size without hurting performance, so
|
||||
// only really optimize for size when -Oz (MinSize) is used.
|
||||
|
@ -5734,13 +5629,13 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
|
|||
bool isZeroConstant = CopyFromConstant && Slice.Array == nullptr;
|
||||
unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(OptSize);
|
||||
|
||||
if (!FindOptimalMemOpLowering(MemOps, Limit, Size,
|
||||
if (!TLI.findOptimalMemOpLowering(MemOps, Limit, Size,
|
||||
(DstAlignCanChange ? 0 : Align),
|
||||
(isZeroConstant ? 0 : SrcAlign),
|
||||
false, false, CopyFromConstant, true,
|
||||
DstPtrInfo.getAddrSpace(),
|
||||
SrcPtrInfo.getAddrSpace(),
|
||||
DAG, TLI))
|
||||
MF.getFunction().getAttributes()))
|
||||
return SDValue();
|
||||
|
||||
if (DstAlignCanChange) {
|
||||
|
@ -5915,12 +5810,12 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
|
|||
SrcAlign = Align;
|
||||
unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemmove(OptSize);
|
||||
|
||||
if (!FindOptimalMemOpLowering(MemOps, Limit, Size,
|
||||
if (!TLI.findOptimalMemOpLowering(MemOps, Limit, Size,
|
||||
(DstAlignCanChange ? 0 : Align), SrcAlign,
|
||||
false, false, false, false,
|
||||
DstPtrInfo.getAddrSpace(),
|
||||
SrcPtrInfo.getAddrSpace(),
|
||||
DAG, TLI))
|
||||
MF.getFunction().getAttributes()))
|
||||
return SDValue();
|
||||
|
||||
if (DstAlignCanChange) {
|
||||
|
@ -6015,11 +5910,11 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl,
|
|||
DstAlignCanChange = true;
|
||||
bool IsZeroVal =
|
||||
isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isNullValue();
|
||||
if (!FindOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(OptSize),
|
||||
if (!TLI.findOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(OptSize),
|
||||
Size, (DstAlignCanChange ? 0 : Align), 0,
|
||||
true, IsZeroVal, false, true,
|
||||
DstPtrInfo.getAddrSpace(), ~0u,
|
||||
DAG, TLI))
|
||||
MF.getFunction().getAttributes()))
|
||||
return SDValue();
|
||||
|
||||
if (DstAlignCanChange) {
|
||||
|
|
|
@ -153,6 +153,107 @@ TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
|
|||
return LowerCallTo(CLI);
|
||||
}
|
||||
|
||||
bool
|
||||
TargetLowering::findOptimalMemOpLowering(std::vector<EVT> &MemOps,
|
||||
unsigned Limit, uint64_t Size,
|
||||
unsigned DstAlign, unsigned SrcAlign,
|
||||
bool IsMemset,
|
||||
bool ZeroMemset,
|
||||
bool MemcpyStrSrc,
|
||||
bool AllowOverlap,
|
||||
unsigned DstAS, unsigned SrcAS,
|
||||
const AttributeList &FuncAttributes) const {
|
||||
// If 'SrcAlign' is zero, that means the memory operation does not need to
|
||||
// load the value, i.e. memset or memcpy from constant string. Otherwise,
|
||||
// it's the inferred alignment of the source. 'DstAlign', on the other hand,
|
||||
// is the specified alignment of the memory operation. If it is zero, that
|
||||
// means it's possible to change the alignment of the destination.
|
||||
// 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does
|
||||
// not need to be loaded.
|
||||
if (!(SrcAlign == 0 || SrcAlign >= DstAlign))
|
||||
return false;
|
||||
|
||||
EVT VT = getOptimalMemOpType(Size, DstAlign, SrcAlign,
|
||||
IsMemset, ZeroMemset, MemcpyStrSrc,
|
||||
FuncAttributes);
|
||||
|
||||
if (VT == MVT::Other) {
|
||||
// Use the largest integer type whose alignment constraints are satisfied.
|
||||
// We only need to check DstAlign here as SrcAlign is always greater or
|
||||
// equal to DstAlign (or zero).
|
||||
VT = MVT::i64;
|
||||
while (DstAlign && DstAlign < VT.getSizeInBits() / 8 &&
|
||||
!allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign))
|
||||
VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
|
||||
assert(VT.isInteger());
|
||||
|
||||
// Find the largest legal integer type.
|
||||
MVT LVT = MVT::i64;
|
||||
while (!isTypeLegal(LVT))
|
||||
LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
|
||||
assert(LVT.isInteger());
|
||||
|
||||
// If the type we've chosen is larger than the largest legal integer type
|
||||
// then use that instead.
|
||||
if (VT.bitsGT(LVT))
|
||||
VT = LVT;
|
||||
}
|
||||
|
||||
unsigned NumMemOps = 0;
|
||||
while (Size != 0) {
|
||||
unsigned VTSize = VT.getSizeInBits() / 8;
|
||||
while (VTSize > Size) {
|
||||
// For now, only use non-vector load / store's for the left-over pieces.
|
||||
EVT NewVT = VT;
|
||||
unsigned NewVTSize;
|
||||
|
||||
bool Found = false;
|
||||
if (VT.isVector() || VT.isFloatingPoint()) {
|
||||
NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
|
||||
if (isOperationLegalOrCustom(ISD::STORE, NewVT) &&
|
||||
isSafeMemOpType(NewVT.getSimpleVT()))
|
||||
Found = true;
|
||||
else if (NewVT == MVT::i64 &&
|
||||
isOperationLegalOrCustom(ISD::STORE, MVT::f64) &&
|
||||
isSafeMemOpType(MVT::f64)) {
|
||||
// i64 is usually not legal on 32-bit targets, but f64 may be.
|
||||
NewVT = MVT::f64;
|
||||
Found = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (!Found) {
|
||||
do {
|
||||
NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
|
||||
if (NewVT == MVT::i8)
|
||||
break;
|
||||
} while (!isSafeMemOpType(NewVT.getSimpleVT()));
|
||||
}
|
||||
NewVTSize = NewVT.getSizeInBits() / 8;
|
||||
|
||||
// If the new VT cannot cover all of the remaining bits, then consider
|
||||
// issuing a (or a pair of) unaligned and overlapping load / store.
|
||||
bool Fast;
|
||||
if (NumMemOps && AllowOverlap && NewVTSize < Size &&
|
||||
allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign, &Fast) &&
|
||||
Fast)
|
||||
VTSize = Size;
|
||||
else {
|
||||
VT = NewVT;
|
||||
VTSize = NewVTSize;
|
||||
}
|
||||
}
|
||||
|
||||
if (++NumMemOps > Limit)
|
||||
return false;
|
||||
|
||||
MemOps.push_back(VT);
|
||||
Size -= VTSize;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Soften the operands of a comparison. This code is shared among BR_CC,
|
||||
/// SELECT_CC, and SETCC handlers.
|
||||
void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
|
||||
|
|
Loading…
Reference in New Issue