[TargetLowering] findOptimalMemOpLowering. NFCI.

This was a local static funtion in SelectionDAG, which I've promoted to
TargetLowering so that I can reuse it to estimate the cost of a memory
operation in D59787.

Differential Revision: https://reviews.llvm.org/D59766

llvm-svn: 359543
This commit is contained in:
Sjoerd Meijer 2019-04-30 10:09:15 +00:00
parent 59a4c0481a
commit 0ed4619679
3 changed files with 133 additions and 123 deletions

View File

@ -2934,6 +2934,20 @@ public:
}
};
/// Determines the optimal series of memory ops to replace the memset / memcpy.
/// Return true if the number of memory ops is below the threshold (Limit).
/// It returns the types of the sequence of memory ops to perform
/// memset / memcpy by reference.
bool findOptimalMemOpLowering(std::vector<EVT> &MemOps,
unsigned Limit, uint64_t Size,
unsigned DstAlign, unsigned SrcAlign,
bool IsMemset,
bool ZeroMemset,
bool MemcpyStrSrc,
bool AllowOverlap,
unsigned DstAS, unsigned SrcAS,
const AttributeList &FuncAttributes) const;
/// Check to see if the specified operand of the specified instruction is a
/// constant integer. If so, check to see if there are any bits set in the
/// constant that are not demanded. If so, shrink the constant and return

View File

@ -5563,111 +5563,6 @@ static bool isMemSrcFromConstant(SDValue Src, ConstantDataArraySlice &Slice) {
SrcDelta + G->getOffset());
}
/// Determines the optimal series of memory ops to replace the memset / memcpy.
/// Return true if the number of memory ops is below the threshold (Limit).
/// It returns the types of the sequence of memory ops to perform
/// memset / memcpy by reference.
static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
unsigned Limit, uint64_t Size,
unsigned DstAlign, unsigned SrcAlign,
bool IsMemset,
bool ZeroMemset,
bool MemcpyStrSrc,
bool AllowOverlap,
unsigned DstAS, unsigned SrcAS,
SelectionDAG &DAG,
const TargetLowering &TLI) {
assert((SrcAlign == 0 || SrcAlign >= DstAlign) &&
"Expecting memcpy / memset source to meet alignment requirement!");
// If 'SrcAlign' is zero, that means the memory operation does not need to
// load the value, i.e. memset or memcpy from constant string. Otherwise,
// it's the inferred alignment of the source. 'DstAlign', on the other hand,
// is the specified alignment of the memory operation. If it is zero, that
// means it's possible to change the alignment of the destination.
// 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does
// not need to be loaded.
const Function &F = DAG.getMachineFunction().getFunction();
EVT VT = TLI.getOptimalMemOpType(Size, DstAlign, SrcAlign,
IsMemset, ZeroMemset, MemcpyStrSrc,
F.getAttributes());
if (VT == MVT::Other) {
// Use the largest integer type whose alignment constraints are satisfied.
// We only need to check DstAlign here as SrcAlign is always greater or
// equal to DstAlign (or zero).
VT = MVT::i64;
while (DstAlign && DstAlign < VT.getSizeInBits() / 8 &&
!TLI.allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign))
VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
assert(VT.isInteger());
// Find the largest legal integer type.
MVT LVT = MVT::i64;
while (!TLI.isTypeLegal(LVT))
LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
assert(LVT.isInteger());
// If the type we've chosen is larger than the largest legal integer type
// then use that instead.
if (VT.bitsGT(LVT))
VT = LVT;
}
unsigned NumMemOps = 0;
while (Size != 0) {
unsigned VTSize = VT.getSizeInBits() / 8;
while (VTSize > Size) {
// For now, only use non-vector load / store's for the left-over pieces.
EVT NewVT = VT;
unsigned NewVTSize;
bool Found = false;
if (VT.isVector() || VT.isFloatingPoint()) {
NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
if (TLI.isOperationLegalOrCustom(ISD::STORE, NewVT) &&
TLI.isSafeMemOpType(NewVT.getSimpleVT()))
Found = true;
else if (NewVT == MVT::i64 &&
TLI.isOperationLegalOrCustom(ISD::STORE, MVT::f64) &&
TLI.isSafeMemOpType(MVT::f64)) {
// i64 is usually not legal on 32-bit targets, but f64 may be.
NewVT = MVT::f64;
Found = true;
}
}
if (!Found) {
do {
NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
if (NewVT == MVT::i8)
break;
} while (!TLI.isSafeMemOpType(NewVT.getSimpleVT()));
}
NewVTSize = NewVT.getSizeInBits() / 8;
// If the new VT cannot cover all of the remaining bits, then consider
// issuing a (or a pair of) unaligned and overlapping load / store.
bool Fast;
if (NumMemOps && AllowOverlap && NewVTSize < Size &&
TLI.allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign, &Fast) &&
Fast)
VTSize = Size;
else {
VT = NewVT;
VTSize = NewVTSize;
}
}
if (++NumMemOps > Limit)
return false;
MemOps.push_back(VT);
Size -= VTSize;
}
return true;
}
static bool shouldLowerMemFuncForSize(const MachineFunction &MF) {
// On Darwin, -Os means optimize for size without hurting performance, so
// only really optimize for size when -Oz (MinSize) is used.
@ -5734,13 +5629,13 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
bool isZeroConstant = CopyFromConstant && Slice.Array == nullptr;
unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(OptSize);
if (!FindOptimalMemOpLowering(MemOps, Limit, Size,
(DstAlignCanChange ? 0 : Align),
(isZeroConstant ? 0 : SrcAlign),
false, false, CopyFromConstant, true,
DstPtrInfo.getAddrSpace(),
SrcPtrInfo.getAddrSpace(),
DAG, TLI))
if (!TLI.findOptimalMemOpLowering(MemOps, Limit, Size,
(DstAlignCanChange ? 0 : Align),
(isZeroConstant ? 0 : SrcAlign),
false, false, CopyFromConstant, true,
DstPtrInfo.getAddrSpace(),
SrcPtrInfo.getAddrSpace(),
MF.getFunction().getAttributes()))
return SDValue();
if (DstAlignCanChange) {
@ -5915,12 +5810,12 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
SrcAlign = Align;
unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemmove(OptSize);
if (!FindOptimalMemOpLowering(MemOps, Limit, Size,
(DstAlignCanChange ? 0 : Align), SrcAlign,
false, false, false, false,
DstPtrInfo.getAddrSpace(),
SrcPtrInfo.getAddrSpace(),
DAG, TLI))
if (!TLI.findOptimalMemOpLowering(MemOps, Limit, Size,
(DstAlignCanChange ? 0 : Align), SrcAlign,
false, false, false, false,
DstPtrInfo.getAddrSpace(),
SrcPtrInfo.getAddrSpace(),
MF.getFunction().getAttributes()))
return SDValue();
if (DstAlignCanChange) {
@ -6015,11 +5910,11 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl,
DstAlignCanChange = true;
bool IsZeroVal =
isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isNullValue();
if (!FindOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(OptSize),
Size, (DstAlignCanChange ? 0 : Align), 0,
true, IsZeroVal, false, true,
DstPtrInfo.getAddrSpace(), ~0u,
DAG, TLI))
if (!TLI.findOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(OptSize),
Size, (DstAlignCanChange ? 0 : Align), 0,
true, IsZeroVal, false, true,
DstPtrInfo.getAddrSpace(), ~0u,
MF.getFunction().getAttributes()))
return SDValue();
if (DstAlignCanChange) {

View File

@ -153,6 +153,107 @@ TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
return LowerCallTo(CLI);
}
bool
TargetLowering::findOptimalMemOpLowering(std::vector<EVT> &MemOps,
unsigned Limit, uint64_t Size,
unsigned DstAlign, unsigned SrcAlign,
bool IsMemset,
bool ZeroMemset,
bool MemcpyStrSrc,
bool AllowOverlap,
unsigned DstAS, unsigned SrcAS,
const AttributeList &FuncAttributes) const {
// If 'SrcAlign' is zero, that means the memory operation does not need to
// load the value, i.e. memset or memcpy from constant string. Otherwise,
// it's the inferred alignment of the source. 'DstAlign', on the other hand,
// is the specified alignment of the memory operation. If it is zero, that
// means it's possible to change the alignment of the destination.
// 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does
// not need to be loaded.
if (!(SrcAlign == 0 || SrcAlign >= DstAlign))
return false;
EVT VT = getOptimalMemOpType(Size, DstAlign, SrcAlign,
IsMemset, ZeroMemset, MemcpyStrSrc,
FuncAttributes);
if (VT == MVT::Other) {
// Use the largest integer type whose alignment constraints are satisfied.
// We only need to check DstAlign here as SrcAlign is always greater or
// equal to DstAlign (or zero).
VT = MVT::i64;
while (DstAlign && DstAlign < VT.getSizeInBits() / 8 &&
!allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign))
VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
assert(VT.isInteger());
// Find the largest legal integer type.
MVT LVT = MVT::i64;
while (!isTypeLegal(LVT))
LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
assert(LVT.isInteger());
// If the type we've chosen is larger than the largest legal integer type
// then use that instead.
if (VT.bitsGT(LVT))
VT = LVT;
}
unsigned NumMemOps = 0;
while (Size != 0) {
unsigned VTSize = VT.getSizeInBits() / 8;
while (VTSize > Size) {
// For now, only use non-vector load / store's for the left-over pieces.
EVT NewVT = VT;
unsigned NewVTSize;
bool Found = false;
if (VT.isVector() || VT.isFloatingPoint()) {
NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
if (isOperationLegalOrCustom(ISD::STORE, NewVT) &&
isSafeMemOpType(NewVT.getSimpleVT()))
Found = true;
else if (NewVT == MVT::i64 &&
isOperationLegalOrCustom(ISD::STORE, MVT::f64) &&
isSafeMemOpType(MVT::f64)) {
// i64 is usually not legal on 32-bit targets, but f64 may be.
NewVT = MVT::f64;
Found = true;
}
}
if (!Found) {
do {
NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
if (NewVT == MVT::i8)
break;
} while (!isSafeMemOpType(NewVT.getSimpleVT()));
}
NewVTSize = NewVT.getSizeInBits() / 8;
// If the new VT cannot cover all of the remaining bits, then consider
// issuing a (or a pair of) unaligned and overlapping load / store.
bool Fast;
if (NumMemOps && AllowOverlap && NewVTSize < Size &&
allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign, &Fast) &&
Fast)
VTSize = Size;
else {
VT = NewVT;
VTSize = NewVTSize;
}
}
if (++NumMemOps > Limit)
return false;
MemOps.push_back(VT);
Size -= VTSize;
}
return true;
}
/// Soften the operands of a comparison. This code is shared among BR_CC,
/// SELECT_CC, and SETCC handlers.
void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,