Add a local stack object block allocation pass. This is still an

experimental pass that allocates locals relative to one another before
register allocation and then assigns them to actual stack slots as a block
later in PEI. This will eventually allow targets with limited index offset
range to allocate additional base registers (not just FP and SP) to
more efficiently reference locals, as well as handle situations where
locals cannot be referenced via SP or FP at all (dynamic stack realignment
together with variable sized objects, for example). It's currently
incomplete and almost certainly buggy. Work in progress.

Disabled by default and gated via the -enable-local-stack-alloc command
line option.

rdar://8277890

llvm-svn: 111059
This commit is contained in:
Jim Grosbach 2010-08-14 00:15:52 +00:00
parent 21e6dc6aa3
commit a030fa5297
6 changed files with 282 additions and 2 deletions

View File

@ -15,6 +15,7 @@
#define LLVM_CODEGEN_MACHINEFRAMEINFO_H #define LLVM_CODEGEN_MACHINEFRAMEINFO_H
#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/SmallVector.h"
//#include "llvm/ADT/IndexedMap.h"
#include "llvm/System/DataTypes.h" #include "llvm/System/DataTypes.h"
#include <cassert> #include <cassert>
#include <vector> #include <vector>
@ -103,10 +104,14 @@ class MachineFrameInfo {
// protector. // protector.
bool MayNeedSP; bool MayNeedSP;
// PreAllocated - If true, the object was mapped into the local frame
// block and doesn't need additional handling for allocation beyond that.
bool PreAllocated;
StackObject(uint64_t Sz, unsigned Al, int64_t SP, bool IM, StackObject(uint64_t Sz, unsigned Al, int64_t SP, bool IM,
bool isSS, bool NSP) bool isSS, bool NSP)
: SPOffset(SP), Size(Sz), Alignment(Al), isImmutable(IM), : SPOffset(SP), Size(Sz), Alignment(Al), isImmutable(IM),
isSpillSlot(isSS), MayNeedSP(NSP) {} isSpillSlot(isSS), MayNeedSP(NSP), PreAllocated(false) {}
}; };
/// Objects - The list of stack objects allocated... /// Objects - The list of stack objects allocated...
@ -195,8 +200,20 @@ class MachineFrameInfo {
/// ///
const TargetFrameInfo &TFI; const TargetFrameInfo &TFI;
/// LocalFrameObjects - References to frame indices which are mapped
/// into the local frame allocation block. <FrameIdx, LocalOffset>
SmallVector<std::pair<int, int64_t>, 32> LocalFrameObjects;
/// LocalFrameSize - Size of the pre-allocated local frame block.
int64_t LocalFrameSize;
/// LocalFrameBaseOffset - The base offset from the stack pointer at
/// function entry of the local frame blob. Set by PEI for use by
/// target in eliminateFrameIndex().
int64_t LocalFrameBaseOffset;
public: public:
explicit MachineFrameInfo(const TargetFrameInfo &tfi) : TFI(tfi) { explicit MachineFrameInfo(const TargetFrameInfo &tfi) : TFI(tfi) {
StackSize = NumFixedObjects = OffsetAdjustment = MaxAlignment = 0; StackSize = NumFixedObjects = OffsetAdjustment = MaxAlignment = 0;
HasVarSizedObjects = false; HasVarSizedObjects = false;
FrameAddressTaken = false; FrameAddressTaken = false;
@ -206,6 +223,8 @@ public:
StackProtectorIdx = -1; StackProtectorIdx = -1;
MaxCallFrameSize = 0; MaxCallFrameSize = 0;
CSIValid = false; CSIValid = false;
LocalFrameSize = 0;
LocalFrameBaseOffset = 0;
} }
/// hasStackObjects - Return true if there are any stack objects in this /// hasStackObjects - Return true if there are any stack objects in this
@ -252,6 +271,42 @@ public:
/// ///
unsigned getNumObjects() const { return Objects.size(); } unsigned getNumObjects() const { return Objects.size(); }
/// mapLocalFrameObject - Map a frame index into the local object block
void mapLocalFrameObject(int ObjectIndex, int64_t Offset) {
LocalFrameObjects.push_back(std::pair<int, int64_t>(ObjectIndex, Offset));
Objects[ObjectIndex + NumFixedObjects].PreAllocated = true;
}
/// getLocalFrameObjectMap - Get the local offset mapping for a for an object
std::pair<int, int64_t> getLocalFrameObjectMap(int i) {
assert (i >= 0 && (unsigned)i < LocalFrameObjects.size() &&
"Invalid local object reference!");
return LocalFrameObjects[i];
}
/// getLocalFrameObjectCount - Return the number of objects allocated into
/// the local object block.
int64_t getLocalFrameObjectCount() { return LocalFrameObjects.size(); }
/// setLocalFrameBaseOffset - Set the base SP offset of the local frame
/// blob.
void setLocalFrameBaseOffset(int64_t o) { LocalFrameBaseOffset = o; }
/// getLocalFrameBaseOffset - Get the base SP offset of the local frame
/// blob.
int64_t getLocalFrameBaseOffset() const { return LocalFrameBaseOffset; }
/// getLocalFrameSize - Get the size of the local object blob.
int64_t getLocalFrameSize() const { return LocalFrameSize; }
/// isObjectPreAllocated - Return true if the object was pre-allocated into
/// the local block.
bool isObjectPreAllocated(int ObjectIdx) const {
assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() &&
"Invalid Object Idx!");
return Objects[ObjectIdx+NumFixedObjects].PreAllocated;
}
/// getObjectSize - Return the size of the specified object. /// getObjectSize - Return the size of the specified object.
/// ///
int64_t getObjectSize(int ObjectIdx) const { int64_t getObjectSize(int ObjectIdx) const {

View File

@ -198,6 +198,13 @@ namespace llvm {
/// the GCC-style builtin setjmp/longjmp (sjlj) to handling EH control flow. /// the GCC-style builtin setjmp/longjmp (sjlj) to handling EH control flow.
FunctionPass *createSjLjEHPass(const TargetLowering *tli); FunctionPass *createSjLjEHPass(const TargetLowering *tli);
/// createLocalStackSlotAllocationPass - This pass assigns local frame
/// indices to stack slots relative to one another and allocates
/// base registers to access them when it is estimated by the target to
/// be out of range of normal frame pointer or stack pointer index
/// addressing.
FunctionPass *createLocalStackSlotAllocationPass();
} // End llvm namespace } // End llvm namespace
#endif #endif

View File

@ -22,6 +22,7 @@ add_llvm_library(LLVMCodeGen
LiveIntervalAnalysis.cpp LiveIntervalAnalysis.cpp
LiveStackAnalysis.cpp LiveStackAnalysis.cpp
LiveVariables.cpp LiveVariables.cpp
LocalStackAllocation.cpp
LowerSubregs.cpp LowerSubregs.cpp
MachineBasicBlock.cpp MachineBasicBlock.cpp
MachineCSE.cpp MachineCSE.cpp

View File

@ -74,6 +74,16 @@ static cl::opt<bool> EnableMCLogging("enable-mc-api-logging", cl::Hidden,
static cl::opt<bool> VerifyMachineCode("verify-machineinstrs", cl::Hidden, static cl::opt<bool> VerifyMachineCode("verify-machineinstrs", cl::Hidden,
cl::desc("Verify generated machine code"), cl::desc("Verify generated machine code"),
cl::init(getenv("LLVM_VERIFY_MACHINEINSTRS")!=NULL)); cl::init(getenv("LLVM_VERIFY_MACHINEINSTRS")!=NULL));
// Enabled or disable local stack object block allocation. This is an
// experimental pass that allocates locals relative to one another before
// register allocation and then assigns them to actual stack slots as a block
// later in PEI. This will eventually allow targets with limited index offset
// range to allocate additional base registers (not just FP and SP) to
// more efficiently reference locals, as well as handle situations where
// locals cannot be referenced via SP or FP at all (dynamic stack realignment
// together with variable sized objects, for example).
cl::opt<bool> EnableLocalStackAlloc("enable-local-stack-alloc", cl::init(false),
cl::Hidden, cl::desc("Enable pre-regalloc stack frame index allocation"));
static cl::opt<cl::boolOrDefault> static cl::opt<cl::boolOrDefault>
AsmVerbose("asm-verbose", cl::desc("Add comments to directives."), AsmVerbose("asm-verbose", cl::desc("Add comments to directives."),
@ -344,6 +354,12 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
if (OptLevel != CodeGenOpt::None) if (OptLevel != CodeGenOpt::None)
PM.add(createOptimizePHIsPass()); PM.add(createOptimizePHIsPass());
// Assign local variables to stack slots relative to one another and simplify
// frame index references where possible. Final stack slot locations will be
// assigned in PEI.
if (EnableLocalStackAlloc)
PM.add(createLocalStackSlotAllocationPass());
if (OptLevel != CodeGenOpt::None) { if (OptLevel != CodeGenOpt::None) {
// With optimization, dead code should already be eliminated. However // With optimization, dead code should already be eliminated. However
// there is one known exception: lowered code for arguments that are only // there is one known exception: lowered code for arguments that are only

View File

@ -0,0 +1,169 @@
//===- LocalStackSlotAllocation.cpp - Pre-allocate locals to stack slots --===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This pass assigns local frame indices to stack slots relative to one another
// and allocates additional base registers to access them when the target
// estimates the are likely to be out of range of stack pointer and frame
// pointer relative addressing.
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "localstackalloc"
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Instructions.h"
#include "llvm/Intrinsics.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/LLVMContext.h"
#include "llvm/Module.h"
#include "llvm/Pass.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetFrameInfo.h"
using namespace llvm;
STATISTIC(NumAllocations, "Number of frame indices processed");
namespace {
class LocalStackSlotPass: public MachineFunctionPass {
int64_t LocalStackSize;
void calculateFrameObjectOffsets(MachineFunction &Fn);
public:
static char ID; // Pass identification, replacement for typeid
explicit LocalStackSlotPass() : MachineFunctionPass(ID) { }
bool runOnMachineFunction(MachineFunction &MF);
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
}
const char *getPassName() const {
return "Local Stack Slot Allocation";
}
private:
};
} // end anonymous namespace
char LocalStackSlotPass::ID = 0;
FunctionPass *llvm::createLocalStackSlotAllocationPass() {
return new LocalStackSlotPass();
}
bool LocalStackSlotPass::runOnMachineFunction(MachineFunction &MF) {
calculateFrameObjectOffsets(MF);
DEBUG(dbgs() << LocalStackSize << " bytes of local storage pre-allocated\n");
return true;
}
/// AdjustStackOffset - Helper function used to adjust the stack frame offset.
static inline void
AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx, int64_t &Offset,
unsigned &MaxAlign) {
unsigned Align = MFI->getObjectAlignment(FrameIdx);
// If the alignment of this object is greater than that of the stack, then
// increase the stack alignment to match.
MaxAlign = std::max(MaxAlign, Align);
// Adjust to alignment boundary.
Offset = (Offset + Align - 1) / Align * Align;
DEBUG(dbgs() << "Allocate FI(" << FrameIdx << ") to local offset "
<< Offset << "\n");
MFI->mapLocalFrameObject(FrameIdx, Offset);
Offset += MFI->getObjectSize(FrameIdx);
++NumAllocations;
}
/// calculateFrameObjectOffsets - Calculate actual frame offsets for all of the
/// abstract stack objects.
///
void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) {
const TargetFrameInfo &TFI = *Fn.getTarget().getFrameInfo();
// Loop over all of the stack objects, assigning sequential addresses...
MachineFrameInfo *MFI = Fn.getFrameInfo();
int64_t Offset = 0;
unsigned MaxAlign = MFI->getMaxAlignment();
// Make sure that the stack protector comes before the local variables on the
// stack.
SmallSet<int, 16> LargeStackObjs;
if (MFI->getStackProtectorIndex() >= 0) {
AdjustStackOffset(MFI, MFI->getStackProtectorIndex(), Offset, MaxAlign);
// Assign large stack objects first.
for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
if (MFI->isDeadObjectIndex(i))
continue;
if (MFI->getStackProtectorIndex() == (int)i)
continue;
if (!MFI->MayNeedStackProtector(i))
continue;
AdjustStackOffset(MFI, i, Offset, MaxAlign);
LargeStackObjs.insert(i);
}
}
// Then assign frame offsets to stack objects that are not used to spill
// callee saved registers.
for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
if (MFI->isDeadObjectIndex(i))
continue;
if (MFI->getStackProtectorIndex() == (int)i)
continue;
if (LargeStackObjs.count(i))
continue;
AdjustStackOffset(MFI, i, Offset, MaxAlign);
}
const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo();
if (!RegInfo->targetHandlesStackFrameRounding()) {
// If we have reserved argument space for call sites in the function
// immediately on entry to the current function, count it as part of the
// overall stack size.
if (MFI->adjustsStack() && RegInfo->hasReservedCallFrame(Fn))
Offset += MFI->getMaxCallFrameSize();
// Round up the size to a multiple of the alignment. If the function has
// any calls or alloca's, align to the target's StackAlignment value to
// ensure that the callee's frame or the alloca data is suitably aligned;
// otherwise, for leaf functions, align to the TransientStackAlignment
// value.
unsigned StackAlign;
if (MFI->adjustsStack() || MFI->hasVarSizedObjects() ||
(RegInfo->needsStackRealignment(Fn) && MFI->getObjectIndexEnd() != 0))
StackAlign = TFI.getStackAlignment();
else
StackAlign = TFI.getTransientStackAlignment();
// If the frame pointer is eliminated, all frame offsets will be relative to
// SP not FP. Align to MaxAlign so this works.
StackAlign = std::max(StackAlign, MaxAlign);
unsigned AlignMask = StackAlign - 1;
Offset = (Offset + AlignMask) & ~uint64_t(AlignMask);
}
// Remember how big this blob of stack space is
LocalStackSize = Offset;
}

View File

@ -19,6 +19,7 @@
// //
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
#define DEBUG_TYPE "pei"
#include "PrologEpilogInserter.h" #include "PrologEpilogInserter.h"
#include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineLoopInfo.h"
@ -32,6 +33,7 @@
#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Support/CommandLine.h" #include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h" #include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/ADT/IndexedMap.h" #include "llvm/ADT/IndexedMap.h"
#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/STLExtras.h"
@ -39,6 +41,10 @@
using namespace llvm; using namespace llvm;
// FIXME: For testing purposes only. Remove once the pre-allocation pass
// is done.
extern cl::opt<bool> EnableLocalStackAlloc;
char PEI::ID = 0; char PEI::ID = 0;
INITIALIZE_PASS(PEI, "prologepilog", INITIALIZE_PASS(PEI, "prologepilog",
@ -462,8 +468,10 @@ AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx,
Offset = (Offset + Align - 1) / Align * Align; Offset = (Offset + Align - 1) / Align * Align;
if (StackGrowsDown) { if (StackGrowsDown) {
DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << -Offset << "]\n");
MFI->setObjectOffset(FrameIdx, -Offset); // Set the computed offset MFI->setObjectOffset(FrameIdx, -Offset); // Set the computed offset
} else { } else {
DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << Offset << "]\n");
MFI->setObjectOffset(FrameIdx, Offset); MFI->setObjectOffset(FrameIdx, Offset);
Offset += MFI->getObjectSize(FrameIdx); Offset += MFI->getObjectSize(FrameIdx);
} }
@ -548,6 +556,26 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign); AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign);
} }
// Store the offset of the start of the local allocation block. This
// will be used later when resolving frame base virtual register pseudos.
MFI->setLocalFrameBaseOffset(Offset);
if (EnableLocalStackAlloc) {
// Allocate the local block
Offset += MFI->getLocalFrameSize();
// Resolve offsets for objects in the local block.
for (unsigned i = 0, e = MFI->getLocalFrameObjectCount(); i != e; ++i) {
std::pair<int, int64_t> Entry = MFI->getLocalFrameObjectMap(i);
int64_t FIOffset = MFI->getLocalFrameBaseOffset() + Entry.second;
AdjustStackOffset(MFI, Entry.first, StackGrowsDown, FIOffset, MaxAlign);
}
}
// FIXME: Allocate locals. Once the block allocation pass is turned on,
// this simplifies to just the second loop, since all of the large objects
// will have already been handled. The second loop can also simplify a
// bit, as the conditionals inside aren't all necessary.
// Make sure that the stack protector comes before the local variables on the // Make sure that the stack protector comes before the local variables on the
// stack. // stack.
SmallSet<int, 16> LargeStackObjs; SmallSet<int, 16> LargeStackObjs;
@ -557,6 +585,8 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
// Assign large stack objects first. // Assign large stack objects first.
for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) { for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
if (MFI->isObjectPreAllocated(i))
continue;
if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex) if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex)
continue; continue;
if (RS && (int)i == RS->getScavengingFrameIndex()) if (RS && (int)i == RS->getScavengingFrameIndex())
@ -576,6 +606,8 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
// Then assign frame offsets to stack objects that are not used to spill // Then assign frame offsets to stack objects that are not used to spill
// callee saved registers. // callee saved registers.
for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) { for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
if (MFI->isObjectPreAllocated(i))
continue;
if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex) if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex)
continue; continue;
if (RS && (int)i == RS->getScavengingFrameIndex()) if (RS && (int)i == RS->getScavengingFrameIndex())