Roll r123609 back in with two changes that fix test failures with expensive

checks enabled:

1) Use '<' to compare integers in a comparison function rather than '<='.

2) Use the uniqued set DefBlocks rather than Info.DefiningBlocks to initialize
the priority queue.

The speedup of scalarrepl on test-suite + SPEC2000 + SPEC2006 is a bit less, at
just under 16% rather than 17%.

llvm-svn: 123662
This commit is contained in:
Cameron Zwarich 2011-01-17 17:38:41 +00:00
parent ea49cb04a5
commit b410858a5f
4 changed files with 125 additions and 65 deletions

View File

@ -38,8 +38,7 @@ bool isAllocaPromotable(const AllocaInst *AI);
/// made to the IR. /// made to the IR.
/// ///
void PromoteMemToReg(const std::vector<AllocaInst*> &Allocas, void PromoteMemToReg(const std::vector<AllocaInst*> &Allocas,
DominatorTree &DT, DominanceFrontier &DF, DominatorTree &DT, AliasSetTracker *AST = 0);
AliasSetTracker *AST = 0);
} // End llvm namespace } // End llvm namespace

View File

@ -152,7 +152,6 @@ namespace {
// will not alter the CFG, so say so. // will not alter the CFG, so say so.
virtual void getAnalysisUsage(AnalysisUsage &AU) const { virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<DominatorTree>(); AU.addRequired<DominatorTree>();
AU.addRequired<DominanceFrontier>();
AU.setPreservesCFG(); AU.setPreservesCFG();
} }
}; };
@ -180,7 +179,6 @@ char SROA_SSAUp::ID = 0;
INITIALIZE_PASS_BEGIN(SROA_DF, "scalarrepl", INITIALIZE_PASS_BEGIN(SROA_DF, "scalarrepl",
"Scalar Replacement of Aggregates (DF)", false, false) "Scalar Replacement of Aggregates (DF)", false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTree) INITIALIZE_PASS_DEPENDENCY(DominatorTree)
INITIALIZE_PASS_DEPENDENCY(DominanceFrontier)
INITIALIZE_PASS_END(SROA_DF, "scalarrepl", INITIALIZE_PASS_END(SROA_DF, "scalarrepl",
"Scalar Replacement of Aggregates (DF)", false, false) "Scalar Replacement of Aggregates (DF)", false, false)
@ -877,11 +875,8 @@ public:
bool SROA::performPromotion(Function &F) { bool SROA::performPromotion(Function &F) {
std::vector<AllocaInst*> Allocas; std::vector<AllocaInst*> Allocas;
DominatorTree *DT = 0; DominatorTree *DT = 0;
DominanceFrontier *DF = 0; if (HasDomFrontiers)
if (HasDomFrontiers) {
DT = &getAnalysis<DominatorTree>(); DT = &getAnalysis<DominatorTree>();
DF = &getAnalysis<DominanceFrontier>();
}
BasicBlock &BB = F.getEntryBlock(); // Get the entry node for the function BasicBlock &BB = F.getEntryBlock(); // Get the entry node for the function
@ -900,7 +895,7 @@ bool SROA::performPromotion(Function &F) {
if (Allocas.empty()) break; if (Allocas.empty()) break;
if (HasDomFrontiers) if (HasDomFrontiers)
PromoteMemToReg(Allocas, *DT, *DF); PromoteMemToReg(Allocas, *DT);
else { else {
SSAUpdater SSA; SSAUpdater SSA;
for (unsigned i = 0, e = Allocas.size(); i != e; ++i) { for (unsigned i = 0, e = Allocas.size(); i != e; ++i) {

View File

@ -40,7 +40,6 @@ namespace {
// //
virtual void getAnalysisUsage(AnalysisUsage &AU) const { virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<DominatorTree>(); AU.addRequired<DominatorTree>();
AU.addRequired<DominanceFrontier>();
AU.setPreservesCFG(); AU.setPreservesCFG();
// This is a cluster of orthogonal Transforms // This is a cluster of orthogonal Transforms
AU.addPreserved<UnifyFunctionExitNodes>(); AU.addPreserved<UnifyFunctionExitNodes>();
@ -54,7 +53,6 @@ char PromotePass::ID = 0;
INITIALIZE_PASS_BEGIN(PromotePass, "mem2reg", "Promote Memory to Register", INITIALIZE_PASS_BEGIN(PromotePass, "mem2reg", "Promote Memory to Register",
false, false) false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTree) INITIALIZE_PASS_DEPENDENCY(DominatorTree)
INITIALIZE_PASS_DEPENDENCY(DominanceFrontier)
INITIALIZE_PASS_END(PromotePass, "mem2reg", "Promote Memory to Register", INITIALIZE_PASS_END(PromotePass, "mem2reg", "Promote Memory to Register",
false, false) false, false)
@ -66,7 +64,6 @@ bool PromotePass::runOnFunction(Function &F) {
bool Changed = false; bool Changed = false;
DominatorTree &DT = getAnalysis<DominatorTree>(); DominatorTree &DT = getAnalysis<DominatorTree>();
DominanceFrontier &DF = getAnalysis<DominanceFrontier>();
while (1) { while (1) {
Allocas.clear(); Allocas.clear();
@ -80,7 +77,7 @@ bool PromotePass::runOnFunction(Function &F) {
if (Allocas.empty()) break; if (Allocas.empty()) break;
PromoteMemToReg(Allocas, DT, DF); PromoteMemToReg(Allocas, DT);
NumPromoted += Allocas.size(); NumPromoted += Allocas.size();
Changed = true; Changed = true;
} }

View File

@ -9,10 +9,19 @@
// //
// This file promotes memory references to be register references. It promotes // This file promotes memory references to be register references. It promotes
// alloca instructions which only have loads and stores as uses. An alloca is // alloca instructions which only have loads and stores as uses. An alloca is
// transformed by using dominator frontiers to place PHI nodes, then traversing // transformed by using iterated dominator frontiers to place PHI nodes, then
// the function in depth-first order to rewrite loads and stores as appropriate. // traversing the function in depth-first order to rewrite loads and stores as
// This is just the standard SSA construction algorithm to construct "pruned" // appropriate.
// SSA form. //
// The algorithm used here is based on:
//
// Sreedhar and Gao. A linear time algorithm for placing phi-nodes.
// In Proceedings of the 22nd ACM SIGPLAN-SIGACT Symposium on Principles of
// Programming Languages
// POPL '95. ACM, New York, NY, 62-73.
//
// It has been modified to not explicitly use the DJ graph data structure and to
// directly compute pruned SSA using per-variable liveness information.
// //
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
@ -35,6 +44,7 @@
#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/STLExtras.h"
#include "llvm/Support/CFG.h" #include "llvm/Support/CFG.h"
#include <algorithm> #include <algorithm>
#include <queue>
using namespace llvm; using namespace llvm;
STATISTIC(NumLocalPromoted, "Number of alloca's promoted within one block"); STATISTIC(NumLocalPromoted, "Number of alloca's promoted within one block");
@ -179,7 +189,6 @@ namespace {
/// ///
std::vector<AllocaInst*> Allocas; std::vector<AllocaInst*> Allocas;
DominatorTree &DT; DominatorTree &DT;
DominanceFrontier &DF;
DIFactory *DIF; DIFactory *DIF;
/// AST - An AliasSetTracker object to update. If null, don't update it. /// AST - An AliasSetTracker object to update. If null, don't update it.
@ -217,12 +226,15 @@ namespace {
/// non-determinstic behavior. /// non-determinstic behavior.
DenseMap<BasicBlock*, unsigned> BBNumbers; DenseMap<BasicBlock*, unsigned> BBNumbers;
/// DomLevels - Maps DomTreeNodes to their level in the dominator tree.
DenseMap<DomTreeNode*, unsigned> DomLevels;
/// BBNumPreds - Lazily compute the number of predecessors a block has. /// BBNumPreds - Lazily compute the number of predecessors a block has.
DenseMap<const BasicBlock*, unsigned> BBNumPreds; DenseMap<const BasicBlock*, unsigned> BBNumPreds;
public: public:
PromoteMem2Reg(const std::vector<AllocaInst*> &A, DominatorTree &dt, PromoteMem2Reg(const std::vector<AllocaInst*> &A, DominatorTree &dt,
DominanceFrontier &df, AliasSetTracker *ast) AliasSetTracker *ast)
: Allocas(A), DT(dt), DF(df), DIF(0), AST(ast) {} : Allocas(A), DT(dt), DIF(0), AST(ast) {}
~PromoteMem2Reg() { ~PromoteMem2Reg() {
delete DIF; delete DIF;
} }
@ -325,11 +337,19 @@ namespace {
DbgDeclare = FindAllocaDbgDeclare(AI); DbgDeclare = FindAllocaDbgDeclare(AI);
} }
}; };
typedef std::pair<DomTreeNode*, unsigned> DomTreeNodePair;
struct DomTreeNodeCompare {
bool operator()(const DomTreeNodePair &LHS, const DomTreeNodePair &RHS) {
return LHS.second < RHS.second;
}
};
} // end of anonymous namespace } // end of anonymous namespace
void PromoteMem2Reg::run() { void PromoteMem2Reg::run() {
Function &F = *DF.getRoot()->getParent(); Function &F = *DT.getRoot()->getParent();
if (AST) PointerAllocaValues.resize(Allocas.size()); if (AST) PointerAllocaValues.resize(Allocas.size());
AllocaDbgDeclares.resize(Allocas.size()); AllocaDbgDeclares.resize(Allocas.size());
@ -422,7 +442,26 @@ void PromoteMem2Reg::run() {
continue; continue;
} }
} }
// If we haven't computed dominator tree levels, do so now.
if (DomLevels.empty()) {
SmallVector<DomTreeNode*, 32> Worklist;
DomTreeNode *Root = DT.getRootNode();
DomLevels[Root] = 0;
Worklist.push_back(Root);
while (!Worklist.empty()) {
DomTreeNode *Node = Worklist.pop_back_val();
unsigned ChildLevel = DomLevels[Node] + 1;
for (DomTreeNode::iterator CI = Node->begin(), CE = Node->end();
CI != CE; ++CI) {
DomLevels[*CI] = ChildLevel;
Worklist.push_back(*CI);
}
}
}
// If we haven't computed a numbering for the BB's in the function, do so // If we haven't computed a numbering for the BB's in the function, do so
// now. // now.
if (BBNumbers.empty()) { if (BBNumbers.empty()) {
@ -663,7 +702,6 @@ ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info,
/// avoiding insertion of dead phi nodes. /// avoiding insertion of dead phi nodes.
void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum, void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum,
AllocaInfo &Info) { AllocaInfo &Info) {
// Unique the set of defining blocks for efficient lookup. // Unique the set of defining blocks for efficient lookup.
SmallPtrSet<BasicBlock*, 32> DefBlocks; SmallPtrSet<BasicBlock*, 32> DefBlocks;
DefBlocks.insert(Info.DefiningBlocks.begin(), Info.DefiningBlocks.end()); DefBlocks.insert(Info.DefiningBlocks.begin(), Info.DefiningBlocks.end());
@ -673,46 +711,78 @@ void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum,
SmallPtrSet<BasicBlock*, 32> LiveInBlocks; SmallPtrSet<BasicBlock*, 32> LiveInBlocks;
ComputeLiveInBlocks(AI, Info, DefBlocks, LiveInBlocks); ComputeLiveInBlocks(AI, Info, DefBlocks, LiveInBlocks);
// Compute the locations where PhiNodes need to be inserted. Look at the // Use a priority queue keyed on dominator tree level so that inserted nodes
// dominance frontier of EACH basic-block we have a write in. // are handled from the bottom of the dominator tree upwards.
unsigned CurrentVersion = 0; typedef std::priority_queue<DomTreeNodePair, SmallVector<DomTreeNodePair, 32>,
std::vector<std::pair<unsigned, BasicBlock*> > DFBlocks; DomTreeNodeCompare> IDFPriorityQueue;
while (!Info.DefiningBlocks.empty()) { IDFPriorityQueue PQ;
BasicBlock *BB = Info.DefiningBlocks.back();
Info.DefiningBlocks.pop_back(); for (SmallPtrSet<BasicBlock*, 32>::const_iterator I = DefBlocks.begin(),
E = DefBlocks.end(); I != E; ++I) {
// Look up the DF for this write, add it to defining blocks. if (DomTreeNode *Node = DT.getNode(*I))
DominanceFrontier::const_iterator it = DF.find(BB); PQ.push(std::make_pair(Node, DomLevels[Node]));
if (it == DF.end()) continue;
const DominanceFrontier::DomSetType &S = it->second;
// In theory we don't need the indirection through the DFBlocks vector.
// In practice, the order of calling QueuePhiNode would depend on the
// (unspecified) ordering of basic blocks in the dominance frontier,
// which would give PHI nodes non-determinstic subscripts. Fix this by
// processing blocks in order of the occurance in the function.
for (DominanceFrontier::DomSetType::const_iterator P = S.begin(),
PE = S.end(); P != PE; ++P) {
// If the frontier block is not in the live-in set for the alloca, don't
// bother processing it.
if (!LiveInBlocks.count(*P))
continue;
DFBlocks.push_back(std::make_pair(BBNumbers[*P], *P));
}
// Sort by which the block ordering in the function.
if (DFBlocks.size() > 1)
std::sort(DFBlocks.begin(), DFBlocks.end());
for (unsigned i = 0, e = DFBlocks.size(); i != e; ++i) {
BasicBlock *BB = DFBlocks[i].second;
if (QueuePhiNode(BB, AllocaNum, CurrentVersion))
Info.DefiningBlocks.push_back(BB);
}
DFBlocks.clear();
} }
std::vector<std::pair<unsigned, BasicBlock*> > DFBlocks;
SmallPtrSet<DomTreeNode*, 32> Visited;
SmallVector<DomTreeNode*, 32> Worklist;
while (!PQ.empty()) {
DomTreeNodePair RootPair = PQ.top();
PQ.pop();
DomTreeNode *Root = RootPair.first;
unsigned RootLevel = RootPair.second;
// Walk all dominator tree children of Root, inspecting their CFG edges with
// targets elsewhere on the dominator tree. Only targets whose level is at
// most Root's level are added to the iterated dominance frontier of the
// definition set.
Worklist.clear();
Worklist.push_back(Root);
while (!Worklist.empty()) {
DomTreeNode *Node = Worklist.pop_back_val();
BasicBlock *BB = Node->getBlock();
for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE;
++SI) {
DomTreeNode *SuccNode = DT.getNode(*SI);
// Quickly skip all CFG edges that are also dominator tree edges instead
// of catching them below.
if (SuccNode->getIDom() == Node)
continue;
unsigned SuccLevel = DomLevels[SuccNode];
if (SuccLevel > RootLevel)
continue;
if (!Visited.insert(SuccNode))
continue;
BasicBlock *SuccBB = SuccNode->getBlock();
if (!LiveInBlocks.count(SuccBB))
continue;
DFBlocks.push_back(std::make_pair(BBNumbers[SuccBB], SuccBB));
if (!DefBlocks.count(SuccBB))
PQ.push(std::make_pair(SuccNode, SuccLevel));
}
for (DomTreeNode::iterator CI = Node->begin(), CE = Node->end(); CI != CE;
++CI) {
if (!Visited.count(*CI))
Worklist.push_back(*CI);
}
}
}
if (DFBlocks.size() > 1)
std::sort(DFBlocks.begin(), DFBlocks.end());
unsigned CurrentVersion = 0;
for (unsigned i = 0, e = DFBlocks.size(); i != e; ++i)
QueuePhiNode(DFBlocks[i].second, AllocaNum, CurrentVersion);
} }
/// RewriteSingleStoreAlloca - If there is only a single store to this value, /// RewriteSingleStoreAlloca - If there is only a single store to this value,
@ -1040,10 +1110,9 @@ NextIteration:
/// made to the IR. /// made to the IR.
/// ///
void llvm::PromoteMemToReg(const std::vector<AllocaInst*> &Allocas, void llvm::PromoteMemToReg(const std::vector<AllocaInst*> &Allocas,
DominatorTree &DT, DominanceFrontier &DF, DominatorTree &DT, AliasSetTracker *AST) {
AliasSetTracker *AST) {
// If there is nothing to do, bail out... // If there is nothing to do, bail out...
if (Allocas.empty()) return; if (Allocas.empty()) return;
PromoteMem2Reg(Allocas, DT, DF, AST).run(); PromoteMem2Reg(Allocas, DT, AST).run();
} }