When passing a huge parameter using the byval mechanism, a long

sequence of loads and stores was being generated to perform the
copy on the x86 targets if the parameter was less than 4 byte
aligned, causing llc to use up vast amounts of memory and time.
Use a "rep movs" form instead.  PR7170.

llvm-svn: 118260
This commit is contained in:
Duncan Sands 2010-11-04 21:16:46 +00:00
parent 112fc6cdfb
commit 98512315f7
2 changed files with 28 additions and 7 deletions

View File

@ -187,19 +187,29 @@ X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
if (!AlwaysInline && SizeVal > Subtarget->getMaxInlineSizeThreshold())
return SDValue();
/// If not DWORD aligned, call the library.
if ((Align & 3) != 0)
/// If not DWORD aligned, it is more efficient to call the library. However
/// if calling the library is not allowed (AlwaysInline), then soldier on as
/// the code generated here is better than the long load-store sequence we
/// would otherwise get.
if (!AlwaysInline && (Align & 3) != 0)
return SDValue();
// If to a segment-relative address space, use the default lowering.
if (DstPtrInfo.getAddrSpace() >= 256 ||
SrcPtrInfo.getAddrSpace() >= 256)
return SDValue();
// DWORD aligned
EVT AVT = MVT::i32;
if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) // QWORD aligned
AVT = MVT::i64;
MVT AVT;
if (Align & 1)
AVT = MVT::i8;
else if (Align & 2)
AVT = MVT::i16;
else if (Align & 4)
// DWORD aligned
AVT = MVT::i32;
else
// QWORD aligned
AVT = Subtarget->is64Bit() ? MVT::i64 : MVT::i32;
unsigned UBytes = AVT.getSizeInBits() / 8;
unsigned CountVal = SizeVal / UBytes;

View File

@ -0,0 +1,11 @@
; RUN: llc < %s
; PR7170
%big = type [131072 x i8]
declare void @foo(%big* byval align 1)
define void @bar(%big* byval align 1 %x) {
call void @foo(%big* byval align 1 %x)
ret void
}