[WinEH] Fix funclet prologues with stack realignment

We already had a test for this for 32-bit SEH catchpads, but those don't
actually create funclets. We had a bug that only appeared in funclet
prologues, where we would establish EBP and ESI as our FP and BP, and
then downstream prologue code would overwrite them.

While I was at it, I fixed Win64+funclets+stackrealign. This issue
doesn't come up as often there due to the ABI requring 16 byte stack
alignment, but now we can rest easy that AVX and WinEH will work well
together =P.

llvm-svn: 252210
This commit is contained in:
Reid Kleckner 2015-11-05 21:09:49 +00:00
parent 484e48e3a3
commit 6ddae31045
7 changed files with 145 additions and 46 deletions

View File

@ -299,12 +299,17 @@ const MCExpr *WinException::getOffsetPlusOne(const MCSymbol *OffsetOf,
Asm->OutContext); Asm->OutContext);
} }
int WinException::getFrameIndexOffset(int FrameIndex) { int WinException::getFrameIndexOffset(int FrameIndex, WinEHFuncInfo &FuncInfo) {
const TargetFrameLowering &TFI = *Asm->MF->getSubtarget().getFrameLowering(); const TargetFrameLowering &TFI = *Asm->MF->getSubtarget().getFrameLowering();
unsigned UnusedReg; unsigned UnusedReg;
if (Asm->MAI->usesWindowsCFI()) if (Asm->MAI->usesWindowsCFI())
return TFI.getFrameIndexReferenceFromSP(*Asm->MF, FrameIndex, UnusedReg); return TFI.getFrameIndexReferenceFromSP(*Asm->MF, FrameIndex, UnusedReg);
return TFI.getFrameIndexReference(*Asm->MF, FrameIndex, UnusedReg); // For 32-bit, offsets should be relative to the end of the EH registration
// node. For 64-bit, it's relative to SP at the end of the prologue.
assert(FuncInfo.EHRegNodeEndOffset != INT_MAX);
int Offset = TFI.getFrameIndexReference(*Asm->MF, FrameIndex, UnusedReg);
Offset += FuncInfo.EHRegNodeEndOffset;
return Offset;
} }
namespace { namespace {
@ -613,7 +618,8 @@ void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) {
int UnwindHelpOffset = 0; int UnwindHelpOffset = 0;
if (Asm->MAI->usesWindowsCFI()) if (Asm->MAI->usesWindowsCFI())
UnwindHelpOffset = getFrameIndexOffset(FuncInfo.UnwindHelpFrameIdx); UnwindHelpOffset =
getFrameIndexOffset(FuncInfo.UnwindHelpFrameIdx, FuncInfo);
MCSymbol *UnwindMapXData = nullptr; MCSymbol *UnwindMapXData = nullptr;
MCSymbol *TryBlockMapXData = nullptr; MCSymbol *TryBlockMapXData = nullptr;
@ -733,14 +739,7 @@ void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) {
// emit an offset of zero, indicating that no copy will occur. // emit an offset of zero, indicating that no copy will occur.
const MCExpr *FrameAllocOffsetRef = nullptr; const MCExpr *FrameAllocOffsetRef = nullptr;
if (HT.CatchObj.FrameIndex != INT_MAX) { if (HT.CatchObj.FrameIndex != INT_MAX) {
int Offset = getFrameIndexOffset(HT.CatchObj.FrameIndex); int Offset = getFrameIndexOffset(HT.CatchObj.FrameIndex, FuncInfo);
// For 32-bit, the catch object offset is relative to the end of the
// EH registration node. For 64-bit, it's relative to SP at the end of
// the prologue.
if (!shouldEmitPersonality) {
assert(FuncInfo.EHRegNodeEndOffset != INT_MAX);
Offset += FuncInfo.EHRegNodeEndOffset;
}
FrameAllocOffsetRef = MCConstantExpr::create(Offset, Asm->OutContext); FrameAllocOffsetRef = MCConstantExpr::create(Offset, Asm->OutContext);
} else { } else {
FrameAllocOffsetRef = MCConstantExpr::create(0, Asm->OutContext); FrameAllocOffsetRef = MCConstantExpr::create(0, Asm->OutContext);

View File

@ -77,7 +77,7 @@ class LLVM_LIBRARY_VISIBILITY WinException : public EHStreamer {
/// given index. For targets using CFI (Win64, etc), this is relative to the /// given index. For targets using CFI (Win64, etc), this is relative to the
/// established SP at the end of the prologue. For targets without CFI (Win32 /// established SP at the end of the prologue. For targets without CFI (Win32
/// only), it is relative to the frame pointer. /// only), it is relative to the frame pointer.
int getFrameIndexOffset(int FrameIndex); int getFrameIndexOffset(int FrameIndex, WinEHFuncInfo &FuncInfo);
public: public:
//===--------------------------------------------------------------------===// //===--------------------------------------------------------------------===//

View File

@ -215,10 +215,14 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
// are really data, and no instructions can live here. // are really data, and no instructions can live here.
if (BB->isEHPad()) { if (BB->isEHPad()) {
const Instruction *I = BB->getFirstNonPHI(); const Instruction *I = BB->getFirstNonPHI();
// FIXME: Don't mark SEH functions without __finally blocks as having // If this is a non-landingpad EH pad, mark this function as using
// funclets. // funclets.
if (!isa<LandingPadInst>(I)) // FIXME: SEH catchpads do not create funclets, so we could avoid setting
// this in such cases in order to improve frame layout.
if (!isa<LandingPadInst>(I)) {
MMI.setHasEHFunclets(true); MMI.setHasEHFunclets(true);
MF->getFrameInfo()->setHasOpaqueSPAdjustment(true);
}
if (isa<CatchEndPadInst>(I) || isa<CleanupEndPadInst>(I)) { if (isa<CatchEndPadInst>(I) || isa<CleanupEndPadInst>(I)) {
assert(&*BB->begin() == I && assert(&*BB->begin() == I &&
"WinEHPrepare failed to remove PHIs from imaginary BBs"); "WinEHPrepare failed to remove PHIs from imaginary BBs");

View File

@ -515,13 +515,12 @@ uint64_t X86FrameLowering::calculateMaxStackAlign(const MachineFunction &MF) con
void X86FrameLowering::BuildStackAlignAND(MachineBasicBlock &MBB, void X86FrameLowering::BuildStackAlignAND(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, MachineBasicBlock::iterator MBBI,
DebugLoc DL, DebugLoc DL, unsigned Reg,
uint64_t MaxAlign) const { uint64_t MaxAlign) const {
uint64_t Val = -MaxAlign; uint64_t Val = -MaxAlign;
MachineInstr *MI = unsigned AndOp = getANDriOpcode(Uses64BitFramePtr, Val);
BuildMI(MBB, MBBI, DL, TII.get(getANDriOpcode(Uses64BitFramePtr, Val)), MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AndOp), Reg)
StackPtr) .addReg(Reg)
.addReg(StackPtr)
.addImm(Val) .addImm(Val)
.setMIFlag(MachineInstr::FrameSetup); .setMIFlag(MachineInstr::FrameSetup);
@ -834,7 +833,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
// Don't do this for Win64, it needs to realign the stack after the prologue. // Don't do this for Win64, it needs to realign the stack after the prologue.
if (!IsWin64Prologue && !IsFunclet && TRI->needsStackRealignment(MF)) { if (!IsWin64Prologue && !IsFunclet && TRI->needsStackRealignment(MF)) {
assert(HasFP && "There should be a frame pointer if stack is realigned."); assert(HasFP && "There should be a frame pointer if stack is realigned.");
BuildStackAlignAND(MBB, MBBI, DL, MaxAlign); BuildStackAlignAND(MBB, MBBI, DL, StackPtr, MaxAlign);
} }
// If there is an SUB32ri of ESP immediately before this instruction, merge // If there is an SUB32ri of ESP immediately before this instruction, merge
@ -923,11 +922,11 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
.setMIFlag(MachineInstr::FrameSetup); .setMIFlag(MachineInstr::FrameSetup);
int SEHFrameOffset = 0; int SEHFrameOffset = 0;
unsigned SPOrEstablisher = IsFunclet ? Establisher : StackPtr;
if (IsWin64Prologue && HasFP) { if (IsWin64Prologue && HasFP) {
// Set RBP to a small fixed offset from RSP. In the funclet case, we base // Set RBP to a small fixed offset from RSP. In the funclet case, we base
// this calculation on the incoming establisher, which holds the value of // this calculation on the incoming establisher, which holds the value of
// RSP from the parent frame at the end of the prologue. // RSP from the parent frame at the end of the prologue.
unsigned SPOrEstablisher = IsFunclet ? Establisher : StackPtr;
SEHFrameOffset = calculateSetFPREG(ParentFrameNumBytes); SEHFrameOffset = calculateSetFPREG(ParentFrameNumBytes);
if (SEHFrameOffset) if (SEHFrameOffset)
addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), FramePtr), addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), FramePtr),
@ -977,9 +976,13 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
// Win64 requires aligning the stack after the prologue. // Win64 requires aligning the stack after the prologue.
if (IsWin64Prologue && TRI->needsStackRealignment(MF)) { if (IsWin64Prologue && TRI->needsStackRealignment(MF)) {
assert(HasFP && "There should be a frame pointer if stack is realigned."); assert(HasFP && "There should be a frame pointer if stack is realigned.");
BuildStackAlignAND(MBB, MBBI, DL, MaxAlign); BuildStackAlignAND(MBB, MBBI, DL, SPOrEstablisher, MaxAlign);
} }
// We already dealt with stack realignment and funclets above.
if (IsFunclet && STI.is32Bit())
return;
// If we need a base pointer, set it up here. It's whatever the value // If we need a base pointer, set it up here. It's whatever the value
// of the stack pointer is at this point. Any variable size objects // of the stack pointer is at this point. Any variable size objects
// will be allocated after this, so we can still use the base pointer // will be allocated after this, so we can still use the base pointer
@ -988,7 +991,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
// Update the base pointer with the current stack pointer. // Update the base pointer with the current stack pointer.
unsigned Opc = Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr; unsigned Opc = Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr;
BuildMI(MBB, MBBI, DL, TII.get(Opc), BasePtr) BuildMI(MBB, MBBI, DL, TII.get(Opc), BasePtr)
.addReg(StackPtr) .addReg(SPOrEstablisher)
.setMIFlag(MachineInstr::FrameSetup); .setMIFlag(MachineInstr::FrameSetup);
if (X86FI->getRestoreBasePointer()) { if (X86FI->getRestoreBasePointer()) {
// Stash value of base pointer. Saving RSP instead of EBP shortens // Stash value of base pointer. Saving RSP instead of EBP shortens
@ -996,11 +999,11 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr; unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)), addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)),
FramePtr, true, X86FI->getRestoreBasePointerOffset()) FramePtr, true, X86FI->getRestoreBasePointerOffset())
.addReg(StackPtr) .addReg(SPOrEstablisher)
.setMIFlag(MachineInstr::FrameSetup); .setMIFlag(MachineInstr::FrameSetup);
} }
if (X86FI->getHasSEHFramePtrSave()) { if (X86FI->getHasSEHFramePtrSave() && !IsFunclet) {
// Stash the value of the frame pointer relative to the base pointer for // Stash the value of the frame pointer relative to the base pointer for
// Win32 EH. This supports Win32 EH, which does the inverse of the above: // Win32 EH. This supports Win32 EH, which does the inverse of the above:
// it recovers the frame pointer from the base pointer rather than the // it recovers the frame pointer from the base pointer rather than the
@ -1359,21 +1362,42 @@ int X86FrameLowering::getFrameIndexReferenceFromSP(const MachineFunction &MF,
const uint64_t StackSize = MFI->getStackSize(); const uint64_t StackSize = MFI->getStackSize();
{ {
#ifndef NDEBUG #ifndef NDEBUG
// Note: LLVM arranges the stack as: // LLVM arranges the stack as follows:
// Args > Saved RetPC (<--FP) > CSRs > dynamic alignment (<--BP) // ...
// > "Stack Slots" (<--SP) // ARG2
// We can always address StackSlots from RSP. We can usually (unless // ARG1
// needsStackRealignment) address CSRs from RSP, but sometimes need to // RETADDR
// address them from RBP. FixedObjects can be placed anywhere in the stack // PUSH RBP <-- RBP points here
// frame depending on their specific requirements (i.e. we can actually // PUSH CSRs
// refer to arguments to the function which are stored in the *callers* // ~~~~~~~ <-- optional stack realignment dynamic adjustment
// frame). As a result, THE RESULT OF THIS CALL IS MEANINGLESS FOR CSRs // ...
// AND FixedObjects IFF needsStackRealignment or hasVarSizedObject. // STACK OBJECTS
// ... <-- RSP after prologue points here
//
// if (hasVarSizedObjects()):
// ... <-- "base pointer" (ESI/RBX) points here
// DYNAMIC ALLOCAS
// ... <-- RSP points here
//
// Case 1: In the simple case of no stack realignment and no dynamic
// allocas, both "fixed" stack objects (arguments and CSRs) are addressable
// with fixed offsets from RSP.
//
// Case 2: In the case of stack realignment with no dynamic allocas, fixed
// stack objects are addressed with RBP and regular stack objects with RSP.
//
// Case 3: In the case of dynamic allocas and stack realignment, RSP is used
// to address stack arguments for outgoing calls and nothing else. The "base
// pointer" points to local variables, and RBP points to fixed objects.
//
// In cases 2 and 3, we can only answer for non-fixed stack objects, and the
// answer we give is relative to the SP after the prologue, and not the
// SP in the middle of the function.
assert(!TRI->hasBasePointer(MF) && "we don't handle this case"); assert((!TRI->needsStackRealignment(MF) || !MFI->isFixedObjectIndex(FI)) &&
"offset from fixed object to SP is not static");
// We don't handle tail calls, and shouldn't be seeing them // We don't handle tail calls, and shouldn't be seeing them either.
// either.
int TailCallReturnAddrDelta = int TailCallReturnAddrDelta =
MF.getInfo<X86MachineFunctionInfo>()->getTCReturnAddrDelta(); MF.getInfo<X86MachineFunctionInfo>()->getTCReturnAddrDelta();
assert(!(TailCallReturnAddrDelta < 0) && "we don't handle this case!"); assert(!(TailCallReturnAddrDelta < 0) && "we don't handle this case!");

View File

@ -135,7 +135,7 @@ private:
/// Aligns the stack pointer by ANDing it with -MaxAlign. /// Aligns the stack pointer by ANDing it with -MaxAlign.
void BuildStackAlignAND(MachineBasicBlock &MBB, void BuildStackAlignAND(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, DebugLoc DL, MachineBasicBlock::iterator MBBI, DebugLoc DL,
uint64_t MaxAlign) const; unsigned Reg, uint64_t MaxAlign) const;
/// Make small positive stack adjustments using POPs. /// Make small positive stack adjustments using POPs.
bool adjustStackWithPops(MachineBasicBlock &MBB, bool adjustStackWithPops(MachineBasicBlock &MBB,

View File

@ -2889,11 +2889,6 @@ SDValue X86TargetLowering::LowerFormalArguments(
DAG.getMachineFunction(), UnwindHelpFI), DAG.getMachineFunction(), UnwindHelpFI),
/*isVolatile=*/true, /*isVolatile=*/true,
/*isNonTemporal=*/false, /*Alignment=*/0); /*isNonTemporal=*/false, /*Alignment=*/0);
} else {
// Functions using Win32 EH are considered to have opaque SP adjustments
// to force local variables to be addressed from the frame or base
// pointers.
MFI->setHasOpaqueSPAdjustment(true);
} }
} }

View File

@ -0,0 +1,77 @@
; RUN: llc -mtriple=i686-pc-windows-msvc < %s | FileCheck --check-prefix=X86 %s
; RUN: llc -mtriple=x86_64-pc-windows-msvc < %s | FileCheck --check-prefix=X64 %s
declare i32 @__CxxFrameHandler3(...)
declare void @Dtor(i64* %o)
declare void @f(i32)
define void @realigned_cleanup() personality i32 (...)* @__CxxFrameHandler3 {
entry:
; Overalign %o to cause stack realignment.
%o = alloca i64, align 32
invoke void @f(i32 1)
to label %invoke.cont unwind label %ehcleanup
invoke.cont: ; preds = %entry
call void @Dtor(i64* %o)
ret void
ehcleanup: ; preds = %entry
%0 = cleanuppad []
call void @Dtor(i64* %o)
cleanupret %0 unwind to caller
}
; X86-LABEL: _realigned_cleanup: # @realigned_cleanup
; X86: pushl %ebp
; X86: movl %esp, %ebp
; X86: pushl %ebx
; X86: pushl %edi
; X86: pushl %esi
; X86: andl $-32, %esp
; X86: subl $96, %esp
; X86: movl %esp, %esi
; EBP will reload from this offset.
; X86: movl %ebp, 28(%esi)
; The last EH reg field is the state number, so dtor adjust is this +4.
; X86: movl $-1, 72(%esi)
; X86-LABEL: "?dtor$2@?0?realigned_cleanup@4HA":
; X86: pushl %ebp
; X86: leal -76(%ebp), %esi
; X86: movl 28(%esi), %ebp
; We used to have a bug where we clobbered ESI after the prologue.
; X86-NOT: movl {{.*}}, %esi
; X86: popl %ebp
; X86: retl # CLEANUPRET
; X64-LABEL: realigned_cleanup: # @realigned_cleanup
; X64: pushq %rbp
; X64: .seh_pushreg 5
; X64: pushq %rbx
; X64: .seh_pushreg 3
; X64: subq $72, %rsp
; X64: .seh_stackalloc 72
; X64: leaq 64(%rsp), %rbp
; X64: .seh_setframe 5, 64
; X64: .seh_endprologue
; X64: andq $-32, %rsp
; X64: movq %rsp, %rbx
; RBP will reload from this offset.
; X64: movq %rbp, 48(%rbx)
; X64-LABEL: "?dtor$2@?0?realigned_cleanup@4HA":
; X64: movq %rdx, 16(%rsp)
; X64: pushq %rbp
; X64: .seh_pushreg 5
; X64: pushq %rbx
; X64: .seh_pushreg 3
; X64: subq $40, %rsp
; X64: .seh_stackalloc 40
; X64: leaq 64(%rdx), %rbp
; X64: .seh_endprologue
; X64: andq $-32, %rdx
; X64: movq %rdx, %rbx
; X64-NOT: mov{{.*}}, %rbx
; X64: popq %rbp
; X64: retq # CLEANUPRET