In some cases, due to scheduling constraints we copy the EFLAGS.

The only way to read the eflags is using push and pop. If we don't
adjust the stack then we run over the first frame index. This is
not something that we want to do, so we have to make sure that
our machine function does not copy the flags. If it does then
we have to emit the prolog that adjusts the stack.

rdar://12896831

llvm-svn: 170961
This commit is contained in:
Nadav Rotem 2012-12-21 23:48:49 +00:00
parent 6ac2fc4976
commit d5aae980cb
3 changed files with 58 additions and 1 deletions

View File

@ -625,6 +625,22 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const {
return CompactUnwindEncoding;
}
/// colobbersTheStack - This function checks if any of the users of EFLAGS
/// copies the EFLAGS. We know that the code that lowers COPY of EFLAGS has
/// to use the stack, and if we don't adjust the stack we clobber the first
/// frame index.
/// See X86InstrInfo::copyPhysReg.
static bool colobbersTheStack(MachineFunction &MF) {
MachineRegisterInfo &MRI = MF.getRegInfo();
for (MachineRegisterInfo::reg_iterator ri = MRI.reg_begin(X86::EFLAGS),
re = MRI.reg_end(); ri != re; ++ri)
if (ri->isCopy())
return true;
return false;
}
/// emitPrologue - Push callee-saved registers onto the stack, which
/// automatically adjust the stack pointer. Adjust the stack pointer to allocate
/// space for local variables. Also emit labels used by the exception handler to
@ -673,12 +689,14 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
// If this is x86-64 and the Red Zone is not disabled, if we are a leaf
// function, and use up to 128 bytes of stack space, don't have a frame
// pointer, calls, or dynamic alloca then we do not need to adjust the
// stack pointer (we fit in the Red Zone).
// stack pointer (we fit in the Red Zone). We also check that we don't
// push and pop from the stack.
if (Is64Bit && !Fn->getFnAttributes().hasAttribute(Attribute::NoRedZone) &&
!RegInfo->needsStackRealignment(MF) &&
!MFI->hasVarSizedObjects() && // No dynamic alloca.
!MFI->adjustsStack() && // No calls.
!IsWin64 && // Win64 has no Red Zone
!colobbersTheStack(MF) && // Don't push and pop.
!MF.getTarget().Options.EnableSegmentedStacks) { // Regular stack
uint64_t MinSize = X86FI->getCalleeSavedFrameSize();
if (HasFP) MinSize += SlotSize;

View File

@ -2892,6 +2892,8 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
}
// Moving EFLAGS to / from another register requires a push and a pop.
// Notice that we have to adjust the stack if we don't want to clobber the
// first frame index. See X86FrameLowering.cpp - colobbersTheStack.
if (SrcReg == X86::EFLAGS) {
if (X86::GR64RegClass.contains(DestReg)) {
BuildMI(MBB, MI, DL, get(X86::PUSHF64));

View File

@ -0,0 +1,37 @@
; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.7.0"
; In the code below we need to copy the EFLAGS because of scheduling constraints.
; When copying the EFLAGS we need to write to the stack with push/pop. This forces
; us to emit the prolog.
; CHECK: main
; CHECK: subq{{.*}}rsp
; CHECK: ret
define i32 @main(i32 %arg, i8** %arg1) nounwind {
bb:
%tmp = alloca i32, align 4 ; [#uses=3 type=i32*]
%tmp2 = alloca i32, align 4 ; [#uses=3 type=i32*]
%tmp3 = alloca i32 ; [#uses=1 type=i32*]
store i32 1, i32* %tmp, align 4
store i32 1, i32* %tmp2, align 4
br label %bb4
bb4: ; preds = %bb4, %bb
%tmp6 = load i32* %tmp2, align 4 ; [#uses=1 type=i32]
%tmp7 = add i32 %tmp6, -1 ; [#uses=2 type=i32]
store i32 %tmp7, i32* %tmp2, align 4
%tmp8 = icmp eq i32 %tmp7, 0 ; [#uses=1 type=i1]
%tmp9 = load i32* %tmp ; [#uses=1 type=i32]
%tmp10 = add i32 %tmp9, -1 ; [#uses=1 type=i32]
store i32 %tmp10, i32* %tmp3
br i1 %tmp8, label %bb11, label %bb4
bb11: ; preds = %bb4
%tmp12 = load i32* %tmp, align 4 ; [#uses=1 type=i32]
ret i32 %tmp12
}