[X86] Elide references to _chkstk for dynamic allocas
The _chkstk function is called by the compiler to probe the stack in an order consistent with Windows' expectations. However, it is possible to elide the call to _chkstk and manually adjust the stack pointer if we can prove that the allocation is fixed size and smaller than the probe size. This shrinks chrome.dll, chrome_child.dll and chrome.exe by a cummulative ~133 KB. Differential Revision: http://reviews.llvm.org/D17679 llvm-svn: 262370
This commit is contained in:
parent
2abc587c1e
commit
791b88b6da
|
@ -16366,9 +16366,8 @@ SDValue
|
|||
X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
MachineFunction &MF = DAG.getMachineFunction();
|
||||
const Function *F = MF.getFunction();
|
||||
bool SplitStack = MF.shouldSplitStack();
|
||||
bool Lower = (Subtarget.isOSWindows() && !Subtarget.isTargetMachO()) ||
|
||||
SplitStack;
|
||||
SDLoc dl(Op);
|
||||
|
||||
// Get the inputs.
|
||||
|
@ -16382,21 +16381,45 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
|
|||
// pointer when other instructions are using the stack.
|
||||
Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, dl, true), dl);
|
||||
|
||||
const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
|
||||
bool Is64Bit = Subtarget.is64Bit();
|
||||
MVT SPTy = getPointerTy(DAG.getDataLayout());
|
||||
|
||||
bool CheckStack = SplitStack;
|
||||
if (!CheckStack && Subtarget.isOSWindows() && !Subtarget.isTargetMachO()) {
|
||||
// The Windows ABI requires us to probe the stack for allocations beyond
|
||||
// the probe size.
|
||||
if (auto *SizeC = dyn_cast<ConstantSDNode>(Size)) {
|
||||
// Try to elide the probe if we can prove that this dynamic allocation is
|
||||
// smaller than the probe size.
|
||||
unsigned StackProbeSize = 4096;
|
||||
if (F->hasFnAttribute("stack-probe-size"))
|
||||
F->getFnAttribute("stack-probe-size")
|
||||
.getValueAsString()
|
||||
.getAsInteger(0, StackProbeSize);
|
||||
unsigned AlignedAlloc = SizeC->getZExtValue();
|
||||
// Round the dynamic alloca's size up to it's alignment.
|
||||
if (Align)
|
||||
AlignedAlloc = alignTo(AlignedAlloc, Align);
|
||||
|
||||
// If the aligned allocation is smaller than the probe size, then we don't
|
||||
// need to probe the stack.
|
||||
CheckStack = AlignedAlloc >= StackProbeSize;
|
||||
} else {
|
||||
// We cannot tell how big this dynamic alloca will be, probe the stack.
|
||||
CheckStack = true;
|
||||
}
|
||||
}
|
||||
|
||||
SDValue Result;
|
||||
if (!Lower) {
|
||||
if (!CheckStack) {
|
||||
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
|
||||
unsigned SPReg = TLI.getStackPointerRegisterToSaveRestore();
|
||||
assert(SPReg && "Target cannot require DYNAMIC_STACKALLOC expansion and"
|
||||
" not tell us which reg is the stack pointer!");
|
||||
EVT VT = Node->getValueType(0);
|
||||
SDValue Tmp3 = Node->getOperand(2);
|
||||
|
||||
SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
|
||||
Chain = SP.getValue(1);
|
||||
unsigned Align = cast<ConstantSDNode>(Tmp3)->getZExtValue();
|
||||
const TargetFrameLowering &TFI = *Subtarget.getFrameLowering();
|
||||
unsigned StackAlign = TFI.getStackAlignment();
|
||||
Result = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value
|
||||
|
@ -16410,8 +16433,6 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
|
|||
if (Is64Bit) {
|
||||
// The 64 bit implementation of segmented stacks needs to clobber both r10
|
||||
// r11. This makes it impossible to use it along with nested parameters.
|
||||
const Function *F = MF.getFunction();
|
||||
|
||||
for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
|
||||
I != E; ++I)
|
||||
if (I->hasNestAttr())
|
||||
|
@ -16434,7 +16455,6 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
|
|||
|
||||
Chain = DAG.getNode(X86ISD::WIN_ALLOCA, dl, NodeTys, Chain, Flag);
|
||||
|
||||
const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
|
||||
unsigned SPReg = RegInfo->getStackRegister();
|
||||
SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, SPTy);
|
||||
Chain = SP.getValue(1);
|
||||
|
|
|
@ -38,8 +38,9 @@ ehcleanup: ; preds = %entry
|
|||
; CHECK: pushl %ebp
|
||||
; CHECK: movl %esp, %ebp
|
||||
; CHECK: subl ${{[0-9]+}}, %esp
|
||||
; CHECK: movl $8, %eax
|
||||
; CHECK: calll __chkstk
|
||||
; CHECK: movl %esp, %[[tmp_sp1:.*]]
|
||||
; CHECK: leal -8(%[[tmp_sp1]]), %[[tmp_sp2:.*]]
|
||||
; CHECK: %[[tmp_sp2]], %esp
|
||||
; CHECK: calll "??0A@@QAE@XZ"
|
||||
; CHECK: calll "??0A@@QAE@XZ"
|
||||
; CHECK: calll _takes_two
|
||||
|
|
|
@ -15,5 +15,8 @@ define void @bar() {
|
|||
ret void
|
||||
}
|
||||
; CHECK-LABEL: _bar:
|
||||
; CHECK: calll __chkstk
|
||||
; CHECK: movl %esp, %ebp
|
||||
; CHECK: movl %esp, %[[sp_tmp:.*]]
|
||||
; CHECK: addl $-4, %[[sp_tmp]]
|
||||
; CHECK: movl %[[sp_tmp]], %esp
|
||||
; CHECK: retl
|
||||
|
|
|
@ -10,13 +10,14 @@ declare void @Foo_ctor(%Foo* %this)
|
|||
|
||||
define void @g() {
|
||||
entry:
|
||||
; CHECK: movl %esp, %ebp
|
||||
%args = alloca inalloca %frame
|
||||
%c = getelementptr %frame, %frame* %args, i32 0, i32 2
|
||||
; CHECK: movl $20, %eax
|
||||
; CHECK: calll __chkstk
|
||||
; CHECK: movl %esp,
|
||||
; CHECK: movl %esp, %[[tmp_sp1:.*]]
|
||||
; CHECK: leal -20(%[[tmp_sp1]]), %[[tmp_sp2:.*]]
|
||||
; CHECK: movl %[[tmp_sp2]], %esp
|
||||
call void @Foo_ctor(%Foo* %c)
|
||||
; CHECK: leal 12(%{{.*}}),
|
||||
; CHECK: leal -8(%[[tmp_sp1]]),
|
||||
; CHECK-NEXT: pushl
|
||||
; CHECK-NEXT: calll _Foo_ctor
|
||||
; CHECK: addl $4, %esp
|
||||
|
|
|
@ -12,6 +12,7 @@ declare void @plus(%Iter* sret, %Iter*, i32)
|
|||
declare void @reverse(%frame.reverse* inalloca align 4)
|
||||
|
||||
define i32 @main() personality i32 (...)* @pers {
|
||||
; CHECK: movl %esp, %ebp
|
||||
%temp.lvalue = alloca %Iter
|
||||
br label %blah
|
||||
|
||||
|
@ -21,9 +22,10 @@ blah:
|
|||
%beg = getelementptr %frame.reverse, %frame.reverse* %rev_args, i32 0, i32 0
|
||||
%end = getelementptr %frame.reverse, %frame.reverse* %rev_args, i32 0, i32 1
|
||||
|
||||
; CHECK: calll __chkstk
|
||||
; CHECK: movl %esp, %[[beg:[^ ]*]]
|
||||
; CHECK: leal 12(%[[beg]]), %[[end:[^ ]*]]
|
||||
; CHECK: movl %esp, %[[end:.*]]
|
||||
; CHECK: leal -24(%[[end]]), %[[beg:.*]]
|
||||
; CHECK: movl %[[beg]], %esp
|
||||
; CHECK: addl $-12, %[[end]]
|
||||
|
||||
call void @begin(%Iter* sret %temp.lvalue)
|
||||
; CHECK: calll _begin
|
||||
|
|
|
@ -7,16 +7,16 @@ declare x86_stdcallcc void @i(i32 %a)
|
|||
|
||||
define void @g() {
|
||||
; CHECK-LABEL: _g:
|
||||
; CHECK: movl %esp, %ebp
|
||||
%b = alloca inalloca %Foo
|
||||
; CHECK: movl $8, %eax
|
||||
; CHECK: calll __chkstk
|
||||
; CHECK: movl %esp, %[[tmp_sp:.*]]
|
||||
; CHECK: leal -8(%[[tmp_sp]]), %esp
|
||||
%f1 = getelementptr %Foo, %Foo* %b, i32 0, i32 0
|
||||
%f2 = getelementptr %Foo, %Foo* %b, i32 0, i32 1
|
||||
store i32 13, i32* %f1
|
||||
store i32 42, i32* %f2
|
||||
; CHECK: movl %esp, %eax
|
||||
; CHECK: movl $13, (%eax)
|
||||
; CHECK: movl $42, 4(%eax)
|
||||
; CHECK: movl $13, -8(%[[tmp_sp]])
|
||||
; CHECK: movl $42, -4(%[[tmp_sp]])
|
||||
call x86_stdcallcc void @f(%Foo* inalloca %b)
|
||||
; CHECK: calll _f@8
|
||||
; CHECK-NOT: %esp
|
||||
|
|
|
@ -7,16 +7,16 @@ declare void @f(%Foo* inalloca %b)
|
|||
define void @a() {
|
||||
; CHECK-LABEL: _a:
|
||||
entry:
|
||||
; CHECK: movl %esp, %ebp
|
||||
%b = alloca inalloca %Foo
|
||||
; CHECK: movl $8, %eax
|
||||
; CHECK: calll __chkstk
|
||||
; CHECK: movl %esp, %[[tmp_sp:.*]]
|
||||
; CHECK: leal -8(%[[tmp_sp]]), %esp
|
||||
%f1 = getelementptr %Foo, %Foo* %b, i32 0, i32 0
|
||||
%f2 = getelementptr %Foo, %Foo* %b, i32 0, i32 1
|
||||
store i32 13, i32* %f1
|
||||
store i32 42, i32* %f2
|
||||
; CHECK: movl %esp, %eax
|
||||
; CHECK: movl $13, (%eax)
|
||||
; CHECK: movl $42, 4(%eax)
|
||||
; CHECK: movl $13, -8(%[[tmp_sp]])
|
||||
; CHECK: movl $42, -4(%[[tmp_sp]])
|
||||
call void @f(%Foo* inalloca %b)
|
||||
; CHECK: calll _f
|
||||
ret void
|
||||
|
@ -27,16 +27,16 @@ declare void @inreg_with_inalloca(i32 inreg %a, %Foo* inalloca %b)
|
|||
define void @b() {
|
||||
; CHECK-LABEL: _b:
|
||||
entry:
|
||||
; CHECK: movl %esp, %ebp
|
||||
%b = alloca inalloca %Foo
|
||||
; CHECK: movl $8, %eax
|
||||
; CHECK: calll __chkstk
|
||||
; CHECK: movl %esp, %[[tmp_sp:.*]]
|
||||
; CHECK: leal -8(%[[tmp_sp]]), %esp
|
||||
%f1 = getelementptr %Foo, %Foo* %b, i32 0, i32 0
|
||||
%f2 = getelementptr %Foo, %Foo* %b, i32 0, i32 1
|
||||
store i32 13, i32* %f1
|
||||
store i32 42, i32* %f2
|
||||
; CHECK: movl %esp, %eax
|
||||
; CHECK: movl $13, (%eax)
|
||||
; CHECK: movl $42, 4(%eax)
|
||||
; CHECK: movl $13, -8(%[[tmp_sp]])
|
||||
; CHECK: movl $42, -4(%[[tmp_sp]])
|
||||
call void @inreg_with_inalloca(i32 inreg 1, %Foo* inalloca %b)
|
||||
; CHECK: movl $1, %eax
|
||||
; CHECK: calll _inreg_with_inalloca
|
||||
|
@ -48,16 +48,16 @@ declare x86_thiscallcc void @thiscall_with_inalloca(i8* %a, %Foo* inalloca %b)
|
|||
define void @c() {
|
||||
; CHECK-LABEL: _c:
|
||||
entry:
|
||||
; CHECK: movl %esp, %ebp
|
||||
%b = alloca inalloca %Foo
|
||||
; CHECK: movl $8, %eax
|
||||
; CHECK: calll __chkstk
|
||||
; CHECK: movl %esp, %[[tmp_sp:.*]]
|
||||
; CHECK: leal -8(%[[tmp_sp]]), %esp
|
||||
%f1 = getelementptr %Foo, %Foo* %b, i32 0, i32 0
|
||||
%f2 = getelementptr %Foo, %Foo* %b, i32 0, i32 1
|
||||
store i32 13, i32* %f1
|
||||
store i32 42, i32* %f2
|
||||
; CHECK: movl %esp, %eax
|
||||
; CHECK-DAG: movl $13, (%eax)
|
||||
; CHECK-DAG: movl $42, 4(%eax)
|
||||
; CHECK-DAG: movl $13, -8(%[[tmp_sp]])
|
||||
; CHECK-DAG: movl $42, -4(%[[tmp_sp]])
|
||||
call x86_thiscallcc void @thiscall_with_inalloca(i8* null, %Foo* inalloca %b)
|
||||
; CHECK-DAG: xorl %ecx, %ecx
|
||||
; CHECK: calll _thiscall_with_inalloca
|
||||
|
|
|
@ -9,7 +9,7 @@ target triple = "i686-pc-windows-msvc18.0.0"
|
|||
|
||||
%struct.S = type { [12 x i8] }
|
||||
|
||||
define x86_thiscallcc void @call_inalloca(i1 %x) {
|
||||
define x86_thiscallcc void @call_inalloca(i1 %x) "stack-probe-size"="12" {
|
||||
entry:
|
||||
%argmem = alloca inalloca <{ %struct.S }>, align 4
|
||||
%argidx1 = getelementptr inbounds <{ %struct.S }>, <{ %struct.S }>* %argmem, i32 0, i32 0, i32 0, i32 0
|
||||
|
|
Loading…
Reference in New Issue