This patch corrects logic in PPCFrameLowering for save and restore of

nonvolatile condition register fields across calls under the SVR4 ABIs.                                            
                                                                                                                   
 * With the 64-bit ABI, the save location is at a fixed offset of 8 from                                           
the stack pointer.  The frame pointer cannot be used to access this                                                
portion of the stack frame since the distance from the frame pointer may                                           
change with alloca calls.                                                                                          
                                                                                                                   
 * With the 32-bit ABI, the save location is just below the general
register save area, and is accessed via the frame pointer like the rest
of the save areas.  This is an optional slot, so it must only be created                                           
if any of CR2, CR3, and CR4 were modified.                                                                      
                                                                                                                   
 * For both ABIs, save/restore logic is generated only if one of the     
nonvolatile CR fields were modified.                                   

I also took this opportunity to clean up an extra FIXME in
PPCFrameLowering.h.  Save area offsets for 32-bit GPRs are meaningless
for the 64-bit ABI, so I removed them for correctness and efficiency.


Fixes PR13708 and partially also PR13623. It lets us enable exception handling
on PPC64.

Patch by William J. Schmidt!

llvm-svn: 163713
This commit is contained in:
Roman Divacky 2012-09-12 14:47:47 +00:00
parent 7d4c68a6ae
commit c9e23d93ae
5 changed files with 326 additions and 74 deletions

View File

@ -13,6 +13,7 @@
#include "PPCFrameLowering.h"
#include "PPCInstrInfo.h"
#include "PPCInstrBuilder.h"
#include "PPCMachineFunctionInfo.h"
#include "llvm/Function.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@ -168,6 +169,11 @@ static void HandleVRSaveUpdate(MachineInstr *MI, const TargetInstrInfo &TII) {
MI->eraseFromParent();
}
static bool spillsCR(const MachineFunction &MF) {
const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
return FuncInfo->isCRSpilled();
}
/// determineFrameLayout - Determine the size of the frame and maximum call
/// frame size.
void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const {
@ -184,13 +190,21 @@ void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const {
// If we are a leaf function, and use up to 224 bytes of stack space,
// don't have a frame pointer, calls, or dynamic alloca then we do not need
// to adjust the stack pointer (we fit in the Red Zone).
// to adjust the stack pointer (we fit in the Red Zone). For 64-bit
// SVR4, we also require a stack frame if we need to spill the CR,
// since this spill area is addressed relative to the stack pointer.
bool DisableRedZone = MF.getFunction()->hasFnAttr(Attribute::NoRedZone);
// FIXME SVR4 The 32-bit SVR4 ABI has no red zone.
// FIXME SVR4 The 32-bit SVR4 ABI has no red zone. However, it can
// still generate stackless code if all local vars are reg-allocated.
// Try: (FrameSize <= 224
// || (FrameSize == 0 && Subtarget.isPPC32 && Subtarget.isSVR4ABI()))
if (!DisableRedZone &&
FrameSize <= 224 && // Fits in red zone.
!MFI->hasVarSizedObjects() && // No dynamic alloca.
!MFI->adjustsStack() && // No calls.
!(Subtarget.isPPC64() && // No 64-bit SVR4 CRsave.
Subtarget.isSVR4ABI()
&& spillsCR(MF)) &&
(!ALIGN_STACK || MaxAlign <= TargetAlign)) { // No special alignment.
// No need for frame
MFI->setStackSize(0);
@ -488,7 +502,6 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
// Add callee saved registers to move list.
const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
int Offset = MFI->getObjectOffset(CSI[I].getFrameIdx());
unsigned Reg = CSI[I].getReg();
if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue;
@ -497,6 +510,25 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
if (PPC::CRBITRCRegClass.contains(Reg))
continue;
// For SVR4, don't emit a move for the CR spill slot if we haven't
// spilled CRs.
if (Subtarget.isSVR4ABI()
&& (PPC::CR2 <= Reg && Reg <= PPC::CR4)
&& !spillsCR(MF))
continue;
// For 64-bit SVR4 when we have spilled CRs, the spill location
// is SP+8, not a frame-relative slot.
if (Subtarget.isSVR4ABI()
&& Subtarget.isPPC64()
&& (PPC::CR2 <= Reg && Reg <= PPC::CR4)) {
MachineLocation CSDst(PPC::X1, 8);
MachineLocation CSSrc(PPC::CR2);
Moves.push_back(MachineMove(Label, CSDst, CSSrc));
continue;
}
int Offset = MFI->getObjectOffset(CSI[I].getFrameIdx());
MachineLocation CSDst(MachineLocation::VirtualFP, Offset);
MachineLocation CSSrc(Reg);
Moves.push_back(MachineMove(Label, CSDst, CSSrc));
@ -714,11 +746,6 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
}
}
static bool spillsCR(const MachineFunction &MF) {
const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
return FuncInfo->isCRSpilled();
}
/// MustSaveLR - Return true if this function requires that we save the LR
/// register onto the stack in the prolog and restore it in the epilog of the
/// function.
@ -808,7 +835,6 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF)
bool HasGPSaveArea = false;
bool HasG8SaveArea = false;
bool HasFPSaveArea = false;
bool HasCRSaveArea = false;
bool HasVRSAVESaveArea = false;
bool HasVRSaveArea = false;
@ -843,10 +869,9 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF)
if (Reg < MinFPR) {
MinFPR = Reg;
}
// FIXME SVR4: Disable CR save area for now.
} else if (PPC::CRBITRCRegClass.contains(Reg) ||
PPC::CRRCRegClass.contains(Reg)) {
// HasCRSaveArea = true;
; // do nothing, as we already know whether CRs are spilled
} else if (PPC::VRSAVERCRegClass.contains(Reg)) {
HasVRSAVESaveArea = true;
} else if (PPC::VRRCRegClass.contains(Reg)) {
@ -926,16 +951,21 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF)
}
}
// The CR save area is below the general register save area.
if (HasCRSaveArea) {
// FIXME SVR4: Is it actually possible to have multiple elements in CSI
// which have the CR/CRBIT register class?
// For 32-bit only, the CR save area is below the general register
// save area. For 64-bit SVR4, the CR save area is addressed relative
// to the stack pointer and hence does not need an adjustment here.
// Only CR2 (the first nonvolatile spilled) has an associated frame
// index so that we have a single uniform save area.
if (spillsCR(MF) && !(Subtarget.isPPC64() && Subtarget.isSVR4ABI())) {
// Adjust the frame index of the CR spill slot.
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
unsigned Reg = CSI[i].getReg();
if (PPC::CRBITRCRegClass.contains(Reg) ||
PPC::CRRCRegClass.contains(Reg)) {
if ((Subtarget.isSVR4ABI() && Reg == PPC::CR2)
// Leave Darwin logic as-is.
|| (!Subtarget.isSVR4ABI() &&
(PPC::CRBITRCRegClass.contains(Reg) ||
PPC::CRRCRegClass.contains(Reg)))) {
int FI = CSI[i].getFrameIdx();
FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
@ -973,3 +1003,184 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF)
}
}
}
bool
PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
const std::vector<CalleeSavedInfo> &CSI,
const TargetRegisterInfo *TRI) const {
// Currently, this function only handles SVR4 32- and 64-bit ABIs.
// Return false otherwise to maintain pre-existing behavior.
if (!Subtarget.isSVR4ABI())
return false;
MachineFunction *MF = MBB.getParent();
const PPCInstrInfo &TII =
*static_cast<const PPCInstrInfo*>(MF->getTarget().getInstrInfo());
DebugLoc DL;
bool CRSpilled = false;
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
unsigned Reg = CSI[i].getReg();
// CR2 through CR4 are the nonvolatile CR fields.
bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4;
if (CRSpilled && IsCRField)
continue;
// Add the callee-saved register as live-in; it's killed at the spill.
MBB.addLiveIn(Reg);
// Insert the spill to the stack frame.
if (IsCRField) {
CRSpilled = true;
// The first time we see a CR field, store the whole CR into the
// save slot via GPR12 (available in the prolog for 32- and 64-bit).
if (Subtarget.isPPC64()) {
// 64-bit: SP+8
MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::X12));
MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::STW))
.addReg(PPC::X12,
getKillRegState(true))
.addImm(8)
.addReg(PPC::X1));
} else {
// 32-bit: FP-relative. Note that we made sure CR2-CR4 all have
// the same frame index in PPCRegisterInfo::hasReservedSpillSlot.
MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12));
MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW))
.addReg(PPC::R12,
getKillRegState(true)),
CSI[i].getFrameIdx()));
}
// Record that we spill the CR in this function.
PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>();
FuncInfo->setSpillsCR();
} else {
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
TII.storeRegToStackSlot(MBB, MI, Reg, true,
CSI[i].getFrameIdx(), RC, TRI);
}
}
return true;
}
static void
restoreCRs(bool isPPC64, bool CR2Spilled, bool CR3Spilled, bool CR4Spilled,
MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
const std::vector<CalleeSavedInfo> &CSI, unsigned CSIIndex) {
MachineFunction *MF = MBB.getParent();
const PPCInstrInfo &TII =
*static_cast<const PPCInstrInfo*>(MF->getTarget().getInstrInfo());
DebugLoc DL;
unsigned RestoreOp, MoveReg;
if (isPPC64) {
// 64-bit: SP+8
MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::LWZ), PPC::X12)
.addImm(8)
.addReg(PPC::X1));
RestoreOp = PPC::MTCRF8;
MoveReg = PPC::X12;
} else {
// 32-bit: FP-relative
MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ),
PPC::R12),
CSI[CSIIndex].getFrameIdx()));
RestoreOp = PPC::MTCRF;
MoveReg = PPC::R12;
}
if (CR2Spilled)
MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2)
.addReg(MoveReg));
if (CR3Spilled)
MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR3)
.addReg(MoveReg));
if (CR4Spilled)
MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR4)
.addReg(MoveReg));
}
bool
PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
const std::vector<CalleeSavedInfo> &CSI,
const TargetRegisterInfo *TRI) const {
// Currently, this function only handles SVR4 32- and 64-bit ABIs.
// Return false otherwise to maintain pre-existing behavior.
if (!Subtarget.isSVR4ABI())
return false;
MachineFunction *MF = MBB.getParent();
const PPCInstrInfo &TII =
*static_cast<const PPCInstrInfo*>(MF->getTarget().getInstrInfo());
bool CR2Spilled = false;
bool CR3Spilled = false;
bool CR4Spilled = false;
unsigned CSIIndex = 0;
// Initialize insertion-point logic; we will be restoring in reverse
// order of spill.
MachineBasicBlock::iterator I = MI, BeforeI = I;
bool AtStart = I == MBB.begin();
if (!AtStart)
--BeforeI;
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
unsigned Reg = CSI[i].getReg();
if (Reg == PPC::CR2) {
CR2Spilled = true;
// The spill slot is associated only with CR2, which is the
// first nonvolatile spilled. Save it here.
CSIIndex = i;
continue;
} else if (Reg == PPC::CR3) {
CR3Spilled = true;
continue;
} else if (Reg == PPC::CR4) {
CR4Spilled = true;
continue;
} else {
// When we first encounter a non-CR register after seeing at
// least one CR register, restore all spilled CRs together.
if ((CR2Spilled || CR3Spilled || CR4Spilled)
&& !(PPC::CR2 <= Reg && Reg <= PPC::CR4)) {
restoreCRs(Subtarget.isPPC64(), CR2Spilled, CR3Spilled, CR4Spilled,
MBB, I, CSI, CSIIndex);
CR2Spilled = CR3Spilled = CR4Spilled = false;
}
// Default behavior for non-CR saves.
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(),
RC, TRI);
assert(I != MBB.begin() &&
"loadRegFromStackSlot didn't insert any code!");
}
// Insert in reverse order.
if (AtStart)
I = MBB.begin();
else {
I = BeforeI;
++I;
}
}
// If we haven't yet spilled the CRs, do so now.
if (CR2Spilled || CR3Spilled || CR4Spilled)
restoreCRs(Subtarget.isPPC64(), CR2Spilled, CR3Spilled, CR4Spilled,
MBB, I, CSI, CSIIndex);
return true;
}

View File

@ -45,6 +45,16 @@ public:
RegScavenger *RS = NULL) const;
void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
const std::vector<CalleeSavedInfo> &CSI,
const TargetRegisterInfo *TRI) const;
bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
const std::vector<CalleeSavedInfo> &CSI,
const TargetRegisterInfo *TRI) const;
/// targetHandlesStackFrameRounding - Returns true if the target is
/// responsible for rounding up the stack frame (probably at emitPrologue
/// time).
@ -170,23 +180,11 @@ public:
{PPC::R15, -68},
{PPC::R14, -72},
// CR save area offset.
// FIXME SVR4: Disable CR save area for now.
// {PPC::CR2, -4},
// {PPC::CR3, -4},
// {PPC::CR4, -4},
// {PPC::CR2LT, -4},
// {PPC::CR2GT, -4},
// {PPC::CR2EQ, -4},
// {PPC::CR2UN, -4},
// {PPC::CR3LT, -4},
// {PPC::CR3GT, -4},
// {PPC::CR3EQ, -4},
// {PPC::CR3UN, -4},
// {PPC::CR4LT, -4},
// {PPC::CR4GT, -4},
// {PPC::CR4EQ, -4},
// {PPC::CR4UN, -4},
// CR save area offset. We map each of the nonvolatile CR fields
// to the slot for CR2, which is the first of the nonvolatile CR
// fields to be assigned, so that we only allocate one save slot.
// See PPCRegisterInfo::hasReservedSpillSlot() for more information.
{PPC::CR2, -4},
// VRSAVE save area offset.
{PPC::VRSAVE, -4},
@ -228,27 +226,6 @@ public:
{PPC::F14, -144},
// General register save area offsets.
// FIXME 64-bit SVR4: Are 32-bit registers actually allocated in 64-bit
// mode?
{PPC::R31, -4},
{PPC::R30, -12},
{PPC::R29, -20},
{PPC::R28, -28},
{PPC::R27, -36},
{PPC::R26, -44},
{PPC::R25, -52},
{PPC::R24, -60},
{PPC::R23, -68},
{PPC::R22, -76},
{PPC::R21, -84},
{PPC::R20, -92},
{PPC::R19, -100},
{PPC::R18, -108},
{PPC::R17, -116},
{PPC::R16, -124},
{PPC::R15, -132},
{PPC::R14, -140},
{PPC::X31, -8},
{PPC::X30, -16},
{PPC::X29, -24},
@ -268,24 +245,6 @@ public:
{PPC::X15, -136},
{PPC::X14, -144},
// CR save area offset.
// FIXME SVR4: Disable CR save area for now.
// {PPC::CR2, -4},
// {PPC::CR3, -4},
// {PPC::CR4, -4},
// {PPC::CR2LT, -4},
// {PPC::CR2GT, -4},
// {PPC::CR2EQ, -4},
// {PPC::CR2UN, -4},
// {PPC::CR3LT, -4},
// {PPC::CR3GT, -4},
// {PPC::CR3EQ, -4},
// {PPC::CR3UN, -4},
// {PPC::CR4LT, -4},
// {PPC::CR4GT, -4},
// {PPC::CR4EQ, -4},
// {PPC::CR4UN, -4},
// VRSAVE save area offset.
{PPC::VRSAVE, -4},

View File

@ -71,7 +71,7 @@ PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST,
: PPCGenRegisterInfo(ST.isPPC64() ? PPC::LR8 : PPC::LR,
ST.isPPC64() ? 0 : 1,
ST.isPPC64() ? 0 : 1),
Subtarget(ST), TII(tii) {
Subtarget(ST), TII(tii), CRSpillFrameIdx(0) {
ImmToIdxMap[PPC::LD] = PPC::LDX; ImmToIdxMap[PPC::STD] = PPC::STDX;
ImmToIdxMap[PPC::LBZ] = PPC::LBZX; ImmToIdxMap[PPC::STB] = PPC::STBX;
ImmToIdxMap[PPC::LHZ] = PPC::LHZX; ImmToIdxMap[PPC::LHA] = PPC::LHAX;
@ -111,6 +111,11 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
return Subtarget.isPPC64() ? CSR_Darwin64_SaveList :
CSR_Darwin32_SaveList;
// For 32-bit SVR4, also initialize the frame index associated with
// the CR spill slot.
if (!Subtarget.isPPC64())
CRSpillFrameIdx = 0;
return Subtarget.isPPC64() ? CSR_SVR464_SaveList : CSR_SVR432_SaveList;
}
@ -477,6 +482,31 @@ void PPCRegisterInfo::lowerCRRestore(MachineBasicBlock::iterator II,
MBB.erase(II);
}
bool
PPCRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF,
unsigned Reg, int &FrameIdx) const {
// For the nonvolatile condition registers (CR2, CR3, CR4) in an SVR4
// ABI, return true to prevent allocating an additional frame slot.
// For 64-bit, the CR save area is at SP+8; the value of FrameIdx = 0
// is arbitrary and will be subsequently ignored. For 32-bit, we must
// create exactly one stack slot and return its FrameIdx for all
// nonvolatiles.
if (Subtarget.isSVR4ABI() && PPC::CR2 <= Reg && Reg <= PPC::CR4) {
if (Subtarget.isPPC64()) {
FrameIdx = 0;
} else if (CRSpillFrameIdx) {
FrameIdx = CRSpillFrameIdx;
} else {
MachineFrameInfo *MFI = ((MachineFunction &)MF).getFrameInfo();
FrameIdx = MFI->CreateFixedObject((uint64_t)4, (int64_t)-4, true);
CRSpillFrameIdx = FrameIdx;
}
return true;
}
return false;
}
void
PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
int SPAdj, RegScavenger *RS) const {

View File

@ -30,6 +30,7 @@ class PPCRegisterInfo : public PPCGenRegisterInfo {
std::map<unsigned, unsigned> ImmToIdxMap;
const PPCSubtarget &Subtarget;
const TargetInstrInfo &TII;
mutable int CRSpillFrameIdx;
public:
PPCRegisterInfo(const PPCSubtarget &SubTarget, const TargetInstrInfo &tii);
@ -65,6 +66,8 @@ public:
int SPAdj, RegScavenger *RS) const;
void lowerCRRestore(MachineBasicBlock::iterator II, unsigned FrameIndex,
int SPAdj, RegScavenger *RS) const;
bool hasReservedSpillSlot(const MachineFunction &MF, unsigned Reg,
int &FrameIdx) const;
void eliminateFrameIndex(MachineBasicBlock::iterator II,
int SPAdj, RegScavenger *RS = NULL) const;

View File

@ -0,0 +1,49 @@
; RUN: llc -O0 -disable-fp-elim -mtriple=powerpc-unknown-linux-gnu < %s | FileCheck %s -check-prefix=PPC32
; RUN: llc -O0 -disable-fp-elim -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s -check-prefix=PPC64
declare void @foo()
define i32 @test_cr2() nounwind {
entry:
%ret = alloca i32, align 4
%0 = call i32 asm sideeffect "\0A\09mtcr $4\0A\09cmp 2,$2,$1\0A\09mfcr $0", "=r,r,r,r,r,~{cr2}"(i32 1, i32 2, i32 3, i32 0) nounwind
store i32 %0, i32* %ret, align 4
call void @foo()
%1 = load i32* %ret, align 4
ret i32 %1
}
; PPC32: mfcr 12
; PPC32-NEXT: stw 12, {{[0-9]+}}(31)
; PPC32: lwz 12, {{[0-9]+}}(31)
; PPC32-NEXT: mtcrf 32, 12
; PPC64: mfcr 12
; PPC64-NEXT: stw 12, 8(1)
; PPC64: lwz 12, 8(1)
; PPC64-NEXT: mtcrf 32, 12
define i32 @test_cr234() nounwind {
entry:
%ret = alloca i32, align 4
%0 = call i32 asm sideeffect "\0A\09mtcr $4\0A\09cmp 2,$2,$1\0A\09cmp 3,$2,$2\0A\09cmp 4,$2,$3\0A\09mfcr $0", "=r,r,r,r,r,~{cr2},~{cr3},~{cr4}"(i32 1, i32 2, i32 3, i32 0) nounwind
store i32 %0, i32* %ret, align 4
call void @foo()
%1 = load i32* %ret, align 4
ret i32 %1
}
; PPC32: mfcr 12
; PPC32-NEXT: stw 12, {{[0-9]+}}(31)
; PPC32: lwz 12, {{[0-9]+}}(31)
; PPC32-NEXT: mtcrf 32, 12
; PPC32-NEXT: mtcrf 16, 12
; PPC32-NEXT: mtcrf 8, 12
; PPC64: mfcr 12
; PPC64-NEXT: stw 12, 8(1)
; PPC64: lwz 12, 8(1)
; PPC64-NEXT: mtcrf 32, 12
; PPC64-NEXT: mtcrf 16, 12
; PPC64-NEXT: mtcrf 8, 12