switch the SUnit pred/succ sets from being std::sets to being smallvectors.

This reduces selectiondag time on kc++ from 5.43s to 4.98s (9%).  More
significantly, this speeds up the default ppc scheduler from ~1571ms to 1063ms,
a 33% speedup.

llvm-svn: 29743
This commit is contained in:
Chris Lattner 2006-08-17 00:09:56 +00:00
parent 0742d58864
commit d86418ab20
4 changed files with 112 additions and 81 deletions

View File

@ -82,9 +82,16 @@ namespace llvm {
// Preds/Succs - The SUnits before/after us in the graph. The boolean value
// is true if the edge is a token chain edge, false if it is a value edge.
std::set<std::pair<SUnit*,bool> > Preds; // All sunit predecessors.
std::set<std::pair<SUnit*,bool> > Succs; // All sunit successors.
SmallVector<std::pair<SUnit*,bool>, 4> Preds; // All sunit predecessors.
SmallVector<std::pair<SUnit*,bool>, 4> Succs; // All sunit successors.
typedef SmallVector<std::pair<SUnit*,bool>, 4>::iterator pred_iterator;
typedef SmallVector<std::pair<SUnit*,bool>, 4>::iterator succ_iterator;
typedef SmallVector<std::pair<SUnit*,bool>, 4>::const_iterator
const_pred_iterator;
typedef SmallVector<std::pair<SUnit*,bool>, 4>::const_iterator
const_succ_iterator;
short NumPreds; // # of preds.
short NumSuccs; // # of sucss.
short NumPredsLeft; // # of preds not scheduled.
@ -111,6 +118,26 @@ namespace llvm {
Latency(0), CycleBound(0), Cycle(0), Depth(0), Height(0),
NodeNum(nodenum) {}
/// addPred - This adds the specified node as a pred of the current node if
/// not already. This returns true if this is a new pred.
bool addPred(SUnit *N, bool isChain) {
for (unsigned i = 0, e = Preds.size(); i != e; ++i)
if (Preds[i].first == N && Preds[i].second == isChain)
return false;
Preds.push_back(std::make_pair(N, isChain));
return true;
}
/// addSucc - This adds the specified node as a succ of the current node if
/// not already. This returns true if this is a new succ.
bool addSucc(SUnit *N, bool isChain) {
for (unsigned i = 0, e = Succs.size(); i != e; ++i)
if (Succs[i].first == N && Succs[i].second == isChain)
return false;
Succs.push_back(std::make_pair(N, isChain));
return true;
}
void dump(const SelectionDAG *G) const;
void dumpAll(const SelectionDAG *G) const;
};
@ -127,7 +154,7 @@ namespace llvm {
public:
virtual ~SchedulingPriorityQueue() {}
virtual void initNodes(const std::vector<SUnit> &SUnits) = 0;
virtual void initNodes(std::vector<SUnit> &SUnits) = 0;
virtual void releaseState() = 0;
virtual bool empty() const = 0;

View File

@ -84,7 +84,7 @@ void ScheduleDAG::BuildSchedUnits() {
N = *UI;
break;
}
if (!HasFlagUse) break;
if (!HasFlagUse) break;
}
// Now all flagged nodes are in FlaggedNodes and N is the bottom-most node.
@ -150,7 +150,7 @@ void ScheduleDAG::BuildSchedUnits() {
assert(OpVT != MVT::Flag && "Flagged nodes should be in same sunit!");
bool isChain = OpVT == MVT::Other;
if (SU->Preds.insert(std::make_pair(OpSU, isChain)).second) {
if (SU->addPred(OpSU, isChain)) {
if (!isChain) {
SU->NumPreds++;
SU->NumPredsLeft++;
@ -158,7 +158,7 @@ void ScheduleDAG::BuildSchedUnits() {
SU->NumChainPredsLeft++;
}
}
if (OpSU->Succs.insert(std::make_pair(SU, isChain)).second) {
if (OpSU->addSucc(SU, isChain)) {
if (!isChain) {
OpSU->NumSuccs++;
OpSU->NumSuccsLeft++;
@ -176,35 +176,35 @@ void ScheduleDAG::BuildSchedUnits() {
return;
}
static void CalculateDepths(SUnit *SU, unsigned Depth) {
if (SU->Depth == 0 || Depth > SU->Depth) {
SU->Depth = Depth;
for (std::set<std::pair<SUnit*, bool> >::iterator I = SU->Succs.begin(),
E = SU->Succs.end(); I != E; ++I)
CalculateDepths(I->first, Depth+1);
static void CalculateDepths(SUnit &SU, unsigned Depth) {
if (SU.Depth == 0 || Depth > SU.Depth) {
SU.Depth = Depth;
for (SUnit::succ_iterator I = SU.Succs.begin(), E = SU.Succs.end();
I != E; ++I)
CalculateDepths(*I->first, Depth+1);
}
}
void ScheduleDAG::CalculateDepths() {
SUnit *Entry = SUnitMap[DAG.getEntryNode().Val];
::CalculateDepths(Entry, 0U);
::CalculateDepths(*Entry, 0U);
for (unsigned i = 0, e = SUnits.size(); i != e; ++i)
if (SUnits[i].Preds.size() == 0 && &SUnits[i] != Entry) {
::CalculateDepths(&SUnits[i], 0U);
::CalculateDepths(SUnits[i], 0U);
}
}
static void CalculateHeights(SUnit *SU, unsigned Height) {
if (SU->Height == 0 || Height > SU->Height) {
SU->Height = Height;
for (std::set<std::pair<SUnit*, bool> >::iterator I = SU->Preds.begin(),
E = SU->Preds.end(); I != E; ++I)
CalculateHeights(I->first, Height+1);
static void CalculateHeights(SUnit &SU, unsigned Height) {
if (SU.Height == 0 || Height > SU.Height) {
SU.Height = Height;
for (SUnit::pred_iterator I = SU.Preds.begin(), E = SU.Preds.end();
I != E; ++I)
CalculateHeights(*I->first, Height+1);
}
}
void ScheduleDAG::CalculateHeights() {
SUnit *Root = SUnitMap[DAG.getRoot().Val];
::CalculateHeights(Root, 0U);
::CalculateHeights(*Root, 0U);
}
/// CountResults - The results of target nodes have register or immediate
@ -646,24 +646,24 @@ void SUnit::dumpAll(const SelectionDAG *G) const {
if (Preds.size() != 0) {
std::cerr << " Predecessors:\n";
for (std::set<std::pair<SUnit*,bool> >::const_iterator I = Preds.begin(),
E = Preds.end(); I != E; ++I) {
for (SUnit::const_succ_iterator I = Preds.begin(), E = Preds.end();
I != E; ++I) {
if (I->second)
std::cerr << " ch ";
std::cerr << " ch #";
else
std::cerr << " val ";
I->first->dump(G);
std::cerr << " val #";
std::cerr << I->first << "\n";
}
}
if (Succs.size() != 0) {
std::cerr << " Successors:\n";
for (std::set<std::pair<SUnit*, bool> >::const_iterator I = Succs.begin(),
E = Succs.end(); I != E; ++I) {
for (SUnit::const_succ_iterator I = Succs.begin(), E = Succs.end();
I != E; ++I) {
if (I->second)
std::cerr << " ch ";
std::cerr << " ch #";
else
std::cerr << " val ";
I->first->dump(G);
std::cerr << " val #";
std::cerr << I->first << "\n";
}
}
std::cerr << "\n";

View File

@ -132,15 +132,16 @@ void ScheduleDAGList::ReleaseSucc(SUnit *SuccSU, bool isChain) {
// available. This is the max of the start time of all predecessors plus
// their latencies.
unsigned AvailableCycle = 0;
for (std::set<std::pair<SUnit*, bool> >::iterator I = SuccSU->Preds.begin(),
for (SUnit::pred_iterator I = SuccSU->Preds.begin(),
E = SuccSU->Preds.end(); I != E; ++I) {
// If this is a token edge, we don't need to wait for the latency of the
// preceeding instruction (e.g. a long-latency load) unless there is also
// some other data dependence.
unsigned PredDoneCycle = I->first->Cycle;
SUnit &Pred = *I->first;
unsigned PredDoneCycle = Pred.Cycle;
if (!I->second)
PredDoneCycle += I->first->Latency;
else if (I->first->Latency)
PredDoneCycle += Pred.Latency;
else if (Pred.Latency)
PredDoneCycle += 1;
AvailableCycle = std::max(AvailableCycle, PredDoneCycle);
@ -161,8 +162,8 @@ void ScheduleDAGList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
SU->Cycle = CurCycle;
// Bottom up: release successors.
for (std::set<std::pair<SUnit*, bool> >::iterator I = SU->Succs.begin(),
E = SU->Succs.end(); I != E; ++I)
for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
I != E; ++I)
ReleaseSucc(I->first, I->second);
}
@ -313,7 +314,7 @@ namespace {
namespace {
class LatencyPriorityQueue : public SchedulingPriorityQueue {
// SUnits - The SUnits for the current graph.
const std::vector<SUnit> *SUnits;
std::vector<SUnit> *SUnits;
// Latencies - The latency (max of latency from this node to the bb exit)
// for each node.
@ -330,7 +331,7 @@ public:
LatencyPriorityQueue() : Queue(latency_sort(this)) {
}
void initNodes(const std::vector<SUnit> &sunits) {
void initNodes(std::vector<SUnit> &sunits) {
SUnits = &sunits;
// Calculate node priorities.
CalculatePriorities();
@ -379,6 +380,7 @@ private:
void CalculatePriorities();
int CalcLatency(const SUnit &SU);
void AdjustPriorityOfUnscheduledPreds(SUnit *SU);
SUnit *getSingleUnscheduledPred(SUnit *SU);
/// RemoveFromPriorityQueue - This is a really inefficient way to remove a
/// node from a priority queue. We should roll our own heap to make this
@ -434,8 +436,8 @@ int LatencyPriorityQueue::CalcLatency(const SUnit &SU) {
return Latency;
int MaxSuccLatency = 0;
for (std::set<std::pair<SUnit*, bool> >::const_iterator I = SU.Succs.begin(),
E = SU.Succs.end(); I != E; ++I)
for (SUnit::const_succ_iterator I = SU.Succs.begin(), E = SU.Succs.end();
I != E; ++I)
MaxSuccLatency = std::max(MaxSuccLatency, CalcLatency(*I->first));
return Latency = MaxSuccLatency + SU.Latency;
@ -452,17 +454,19 @@ void LatencyPriorityQueue::CalculatePriorities() {
/// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor
/// of SU, return it, otherwise return null.
static SUnit *getSingleUnscheduledPred(SUnit *SU) {
SUnit *LatencyPriorityQueue::getSingleUnscheduledPred(SUnit *SU) {
SUnit *OnlyAvailablePred = 0;
for (std::set<std::pair<SUnit*, bool> >::const_iterator I = SU->Preds.begin(),
E = SU->Preds.end(); I != E; ++I)
if (!I->first->isScheduled) {
for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
I != E; ++I) {
SUnit &Pred = *I->first;
if (!Pred.isScheduled) {
// We found an available, but not scheduled, predecessor. If it's the
// only one we have found, keep track of it... otherwise give up.
if (OnlyAvailablePred && OnlyAvailablePred != I->first)
if (OnlyAvailablePred && OnlyAvailablePred != &Pred)
return 0;
OnlyAvailablePred = I->first;
OnlyAvailablePred = &Pred;
}
}
return OnlyAvailablePred;
}
@ -471,8 +475,8 @@ void LatencyPriorityQueue::push_impl(SUnit *SU) {
// Look at all of the successors of this node. Count the number of nodes that
// this node is the sole unscheduled node for.
unsigned NumNodesBlocking = 0;
for (std::set<std::pair<SUnit*, bool> >::const_iterator I = SU->Succs.begin(),
E = SU->Succs.end(); I != E; ++I)
for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
I != E; ++I)
if (getSingleUnscheduledPred(I->first) == SU)
++NumNodesBlocking;
NumNodesSolelyBlocking[SU->NodeNum] = NumNodesBlocking;
@ -486,8 +490,8 @@ void LatencyPriorityQueue::push_impl(SUnit *SU) {
// single predecessor has a higher priority, since scheduling it will make
// the node available.
void LatencyPriorityQueue::ScheduledNode(SUnit *SU) {
for (std::set<std::pair<SUnit*, bool> >::const_iterator I = SU->Succs.begin(),
E = SU->Succs.end(); I != E; ++I)
for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
I != E; ++I)
AdjustPriorityOfUnscheduledPreds(I->first);
}

View File

@ -93,7 +93,7 @@ void ScheduleDAGRRList::Schedule() {
CalculateDepths();
CalculateHeights();
DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
SUnits[su].dumpAll(&DAG));
SUnits[su].dumpAll(&DAG));
AvailableQueue->initNodes(SUnits);
@ -143,8 +143,8 @@ void ScheduleDAGRRList::CommuteNodesToReducePressure() {
}
}
for (std::set<std::pair<SUnit*, bool> >::iterator I = SU->Preds.begin(),
E = SU->Preds.end(); I != E; ++I) {
for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
I != E; ++I) {
if (!I->second)
OperandSeen.insert(I->first);
}
@ -235,8 +235,8 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
Sequence.push_back(SU);
// Bottom up: release predecessors
for (std::set<std::pair<SUnit*, bool> >::iterator I = SU->Preds.begin(),
E = SU->Preds.end(); I != E; ++I)
for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
I != E; ++I)
ReleasePred(I->first, I->second, CurCycle);
SU->isScheduled = true;
}
@ -347,8 +347,8 @@ void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
Sequence.push_back(SU);
// Top down: release successors
for (std::set<std::pair<SUnit*, bool> >::iterator I = SU->Succs.begin(),
E = SU->Succs.end(); I != E; ++I)
for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
I != E; ++I)
ReleaseSucc(I->first, I->second, CurCycle);
SU->isScheduled = true;
}
@ -448,7 +448,7 @@ namespace {
RegReductionPriorityQueue() :
Queue(SF(this)) {}
virtual void initNodes(const std::vector<SUnit> &sunits) {}
virtual void initNodes(std::vector<SUnit> &sunits) {}
virtual void releaseState() {}
virtual int getSethiUllmanNumber(unsigned NodeNum) const {
@ -485,7 +485,7 @@ namespace {
public:
BURegReductionPriorityQueue() {}
void initNodes(const std::vector<SUnit> &sunits) {
void initNodes(std::vector<SUnit> &sunits) {
SUnits = &sunits;
// Add pseudo dependency edges for two-address nodes.
AddPseudoTwoAddrDeps();
@ -521,7 +521,7 @@ namespace {
public:
TDRegReductionPriorityQueue() {}
void initNodes(const std::vector<SUnit> &sunits) {
void initNodes(std::vector<SUnit> &sunits) {
SUnits = &sunits;
// Calculate node priorities.
CalculatePriorities();
@ -548,8 +548,8 @@ static bool isFloater(const SUnit *SU) {
if (SU->NumPreds == 0)
return true;
if (SU->NumPreds == 1) {
for (std::set<std::pair<SUnit*, bool> >::iterator I = SU->Preds.begin(),
E = SU->Preds.end(); I != E; ++I) {
for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
I != E; ++I) {
if (I->second) continue;
SUnit *PredSU = I->first;
@ -566,8 +566,8 @@ static bool isFloater(const SUnit *SU) {
static bool isSimpleFloaterUse(const SUnit *SU) {
unsigned NumOps = 0;
for (std::set<std::pair<SUnit*, bool> >::iterator I = SU->Preds.begin(),
E = SU->Preds.end(); I != E; ++I) {
for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
I != E; ++I) {
if (I->second) continue;
if (++NumOps > 1)
return false;
@ -641,8 +641,8 @@ static void isReachable(SUnit *SU, SUnit *TargetSU,
}
if (!Visited.insert(SU).second) return;
for (std::set<std::pair<SUnit*, bool> >::iterator I = SU->Preds.begin(),
E = SU->Preds.end(); I != E; ++I)
for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); I != E;
++I)
isReachable(I->first, TargetSU, Visited, Reached);
}
@ -655,8 +655,8 @@ static bool isReachable(SUnit *SU, SUnit *TargetSU) {
static SUnit *getDefUsePredecessor(SUnit *SU) {
SDNode *DU = SU->Node->getOperand(0).Val;
for (std::set<std::pair<SUnit*, bool> >::iterator
I = SU->Preds.begin(), E = SU->Preds.end(); I != E; ++I) {
for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
I != E; ++I) {
if (I->second) continue; // ignore chain preds
SUnit *PredSU = I->first;
if (PredSU->Node == DU)
@ -689,8 +689,8 @@ void BURegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() {
SUnit *DUSU = getDefUsePredecessor(SU);
if (!DUSU) continue;
for (std::set<std::pair<SUnit*, bool> >::iterator I = DUSU->Succs.begin(),
E = DUSU->Succs.end(); I != E; ++I) {
for (SUnit::succ_iterator I = DUSU->Succs.begin(), E = DUSU->Succs.end();
I != E; ++I) {
if (I->second) continue;
SUnit *SuccSU = I->first;
if (SuccSU != SU &&
@ -699,9 +699,9 @@ void BURegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() {
if (SuccSU->Depth == SU->Depth && !isReachable(SuccSU, SU)) {
DEBUG(std::cerr << "Adding an edge from SU # " << SU->NodeNum
<< " to SU #" << SuccSU->NodeNum << "\n");
if (SU->Preds.insert(std::make_pair(SuccSU, true)).second)
if (SU->addPred(SuccSU, true))
SU->NumChainPredsLeft++;
if (SuccSU->Succs.insert(std::make_pair(SU, true)).second)
if (SuccSU->addSucc(SU, true))
SuccSU->NumChainSuccsLeft++;
}
}
@ -734,8 +734,8 @@ int BURegReductionPriorityQueue<SF>::CalcNodePriority(const SUnit *SU) {
SethiUllmanNumber = INT_MAX - 10;
else {
int Extra = 0;
for (std::set<std::pair<SUnit*, bool> >::const_iterator
I = SU->Preds.begin(), E = SU->Preds.end(); I != E; ++I) {
for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
I != E; ++I) {
if (I->second) continue; // ignore chain preds
SUnit *PredSU = I->first;
int PredSethiUllman = CalcNodePriority(PredSU);
@ -763,11 +763,11 @@ void BURegReductionPriorityQueue<SF>::CalculatePriorities() {
static unsigned SumOfUnscheduledPredsOfSuccs(const SUnit *SU) {
unsigned Sum = 0;
for (std::set<std::pair<SUnit*, bool> >::const_iterator
I = SU->Succs.begin(), E = SU->Succs.end(); I != E; ++I) {
for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
I != E; ++I) {
SUnit *SuccSU = I->first;
for (std::set<std::pair<SUnit*, bool> >::const_iterator
II = SuccSU->Preds.begin(), EE = SuccSU->Preds.end(); II != EE; ++II) {
for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(),
EE = SuccSU->Preds.end(); II != EE; ++II) {
SUnit *PredSU = II->first;
if (!PredSU->isScheduled)
Sum++;
@ -855,8 +855,8 @@ int TDRegReductionPriorityQueue<SF>::CalcNodePriority(const SUnit *SU) {
SethiUllmanNumber = 1;
else {
int Extra = 0;
for (std::set<std::pair<SUnit*, bool> >::const_iterator
I = SU->Preds.begin(), E = SU->Preds.end(); I != E; ++I) {
for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
I != E; ++I) {
if (I->second) continue; // ignore chain preds
SUnit *PredSU = I->first;
int PredSethiUllman = CalcNodePriority(PredSU);