From d86418ab20cc59b5ef2755a973121f0c89faaa8f Mon Sep 17 00:00:00 2001 From: Chris Lattner Date: Thu, 17 Aug 2006 00:09:56 +0000 Subject: [PATCH] switch the SUnit pred/succ sets from being std::sets to being smallvectors. This reduces selectiondag time on kc++ from 5.43s to 4.98s (9%). More significantly, this speeds up the default ppc scheduler from ~1571ms to 1063ms, a 33% speedup. llvm-svn: 29743 --- llvm/include/llvm/CodeGen/ScheduleDAG.h | 33 +++++++++- llvm/lib/CodeGen/SelectionDAG/ScheduleDAG.cpp | 56 ++++++++--------- .../CodeGen/SelectionDAG/ScheduleDAGList.cpp | 44 +++++++------- .../SelectionDAG/ScheduleDAGRRList.cpp | 60 +++++++++---------- 4 files changed, 112 insertions(+), 81 deletions(-) diff --git a/llvm/include/llvm/CodeGen/ScheduleDAG.h b/llvm/include/llvm/CodeGen/ScheduleDAG.h index ed6ccd0a6414..5d85d374338d 100644 --- a/llvm/include/llvm/CodeGen/ScheduleDAG.h +++ b/llvm/include/llvm/CodeGen/ScheduleDAG.h @@ -82,9 +82,16 @@ namespace llvm { // Preds/Succs - The SUnits before/after us in the graph. The boolean value // is true if the edge is a token chain edge, false if it is a value edge. - std::set > Preds; // All sunit predecessors. - std::set > Succs; // All sunit successors. + SmallVector, 4> Preds; // All sunit predecessors. + SmallVector, 4> Succs; // All sunit successors. + typedef SmallVector, 4>::iterator pred_iterator; + typedef SmallVector, 4>::iterator succ_iterator; + typedef SmallVector, 4>::const_iterator + const_pred_iterator; + typedef SmallVector, 4>::const_iterator + const_succ_iterator; + short NumPreds; // # of preds. short NumSuccs; // # of sucss. short NumPredsLeft; // # of preds not scheduled. @@ -111,6 +118,26 @@ namespace llvm { Latency(0), CycleBound(0), Cycle(0), Depth(0), Height(0), NodeNum(nodenum) {} + /// addPred - This adds the specified node as a pred of the current node if + /// not already. This returns true if this is a new pred. + bool addPred(SUnit *N, bool isChain) { + for (unsigned i = 0, e = Preds.size(); i != e; ++i) + if (Preds[i].first == N && Preds[i].second == isChain) + return false; + Preds.push_back(std::make_pair(N, isChain)); + return true; + } + + /// addSucc - This adds the specified node as a succ of the current node if + /// not already. This returns true if this is a new succ. + bool addSucc(SUnit *N, bool isChain) { + for (unsigned i = 0, e = Succs.size(); i != e; ++i) + if (Succs[i].first == N && Succs[i].second == isChain) + return false; + Succs.push_back(std::make_pair(N, isChain)); + return true; + } + void dump(const SelectionDAG *G) const; void dumpAll(const SelectionDAG *G) const; }; @@ -127,7 +154,7 @@ namespace llvm { public: virtual ~SchedulingPriorityQueue() {} - virtual void initNodes(const std::vector &SUnits) = 0; + virtual void initNodes(std::vector &SUnits) = 0; virtual void releaseState() = 0; virtual bool empty() const = 0; diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAG.cpp index 2d2c34340b1c..d33e1ecef7ea 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAG.cpp @@ -84,7 +84,7 @@ void ScheduleDAG::BuildSchedUnits() { N = *UI; break; } - if (!HasFlagUse) break; + if (!HasFlagUse) break; } // Now all flagged nodes are in FlaggedNodes and N is the bottom-most node. @@ -150,7 +150,7 @@ void ScheduleDAG::BuildSchedUnits() { assert(OpVT != MVT::Flag && "Flagged nodes should be in same sunit!"); bool isChain = OpVT == MVT::Other; - if (SU->Preds.insert(std::make_pair(OpSU, isChain)).second) { + if (SU->addPred(OpSU, isChain)) { if (!isChain) { SU->NumPreds++; SU->NumPredsLeft++; @@ -158,7 +158,7 @@ void ScheduleDAG::BuildSchedUnits() { SU->NumChainPredsLeft++; } } - if (OpSU->Succs.insert(std::make_pair(SU, isChain)).second) { + if (OpSU->addSucc(SU, isChain)) { if (!isChain) { OpSU->NumSuccs++; OpSU->NumSuccsLeft++; @@ -176,35 +176,35 @@ void ScheduleDAG::BuildSchedUnits() { return; } -static void CalculateDepths(SUnit *SU, unsigned Depth) { - if (SU->Depth == 0 || Depth > SU->Depth) { - SU->Depth = Depth; - for (std::set >::iterator I = SU->Succs.begin(), - E = SU->Succs.end(); I != E; ++I) - CalculateDepths(I->first, Depth+1); +static void CalculateDepths(SUnit &SU, unsigned Depth) { + if (SU.Depth == 0 || Depth > SU.Depth) { + SU.Depth = Depth; + for (SUnit::succ_iterator I = SU.Succs.begin(), E = SU.Succs.end(); + I != E; ++I) + CalculateDepths(*I->first, Depth+1); } } void ScheduleDAG::CalculateDepths() { SUnit *Entry = SUnitMap[DAG.getEntryNode().Val]; - ::CalculateDepths(Entry, 0U); + ::CalculateDepths(*Entry, 0U); for (unsigned i = 0, e = SUnits.size(); i != e; ++i) if (SUnits[i].Preds.size() == 0 && &SUnits[i] != Entry) { - ::CalculateDepths(&SUnits[i], 0U); + ::CalculateDepths(SUnits[i], 0U); } } -static void CalculateHeights(SUnit *SU, unsigned Height) { - if (SU->Height == 0 || Height > SU->Height) { - SU->Height = Height; - for (std::set >::iterator I = SU->Preds.begin(), - E = SU->Preds.end(); I != E; ++I) - CalculateHeights(I->first, Height+1); +static void CalculateHeights(SUnit &SU, unsigned Height) { + if (SU.Height == 0 || Height > SU.Height) { + SU.Height = Height; + for (SUnit::pred_iterator I = SU.Preds.begin(), E = SU.Preds.end(); + I != E; ++I) + CalculateHeights(*I->first, Height+1); } } void ScheduleDAG::CalculateHeights() { SUnit *Root = SUnitMap[DAG.getRoot().Val]; - ::CalculateHeights(Root, 0U); + ::CalculateHeights(*Root, 0U); } /// CountResults - The results of target nodes have register or immediate @@ -646,24 +646,24 @@ void SUnit::dumpAll(const SelectionDAG *G) const { if (Preds.size() != 0) { std::cerr << " Predecessors:\n"; - for (std::set >::const_iterator I = Preds.begin(), - E = Preds.end(); I != E; ++I) { + for (SUnit::const_succ_iterator I = Preds.begin(), E = Preds.end(); + I != E; ++I) { if (I->second) - std::cerr << " ch "; + std::cerr << " ch #"; else - std::cerr << " val "; - I->first->dump(G); + std::cerr << " val #"; + std::cerr << I->first << "\n"; } } if (Succs.size() != 0) { std::cerr << " Successors:\n"; - for (std::set >::const_iterator I = Succs.begin(), - E = Succs.end(); I != E; ++I) { + for (SUnit::const_succ_iterator I = Succs.begin(), E = Succs.end(); + I != E; ++I) { if (I->second) - std::cerr << " ch "; + std::cerr << " ch #"; else - std::cerr << " val "; - I->first->dump(G); + std::cerr << " val #"; + std::cerr << I->first << "\n"; } } std::cerr << "\n"; diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp index aa62bd49fce2..15f04cd8b1c5 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp @@ -132,15 +132,16 @@ void ScheduleDAGList::ReleaseSucc(SUnit *SuccSU, bool isChain) { // available. This is the max of the start time of all predecessors plus // their latencies. unsigned AvailableCycle = 0; - for (std::set >::iterator I = SuccSU->Preds.begin(), + for (SUnit::pred_iterator I = SuccSU->Preds.begin(), E = SuccSU->Preds.end(); I != E; ++I) { // If this is a token edge, we don't need to wait for the latency of the // preceeding instruction (e.g. a long-latency load) unless there is also // some other data dependence. - unsigned PredDoneCycle = I->first->Cycle; + SUnit &Pred = *I->first; + unsigned PredDoneCycle = Pred.Cycle; if (!I->second) - PredDoneCycle += I->first->Latency; - else if (I->first->Latency) + PredDoneCycle += Pred.Latency; + else if (Pred.Latency) PredDoneCycle += 1; AvailableCycle = std::max(AvailableCycle, PredDoneCycle); @@ -161,8 +162,8 @@ void ScheduleDAGList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { SU->Cycle = CurCycle; // Bottom up: release successors. - for (std::set >::iterator I = SU->Succs.begin(), - E = SU->Succs.end(); I != E; ++I) + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) ReleaseSucc(I->first, I->second); } @@ -313,7 +314,7 @@ namespace { namespace { class LatencyPriorityQueue : public SchedulingPriorityQueue { // SUnits - The SUnits for the current graph. - const std::vector *SUnits; + std::vector *SUnits; // Latencies - The latency (max of latency from this node to the bb exit) // for each node. @@ -330,7 +331,7 @@ public: LatencyPriorityQueue() : Queue(latency_sort(this)) { } - void initNodes(const std::vector &sunits) { + void initNodes(std::vector &sunits) { SUnits = &sunits; // Calculate node priorities. CalculatePriorities(); @@ -379,6 +380,7 @@ private: void CalculatePriorities(); int CalcLatency(const SUnit &SU); void AdjustPriorityOfUnscheduledPreds(SUnit *SU); + SUnit *getSingleUnscheduledPred(SUnit *SU); /// RemoveFromPriorityQueue - This is a really inefficient way to remove a /// node from a priority queue. We should roll our own heap to make this @@ -434,8 +436,8 @@ int LatencyPriorityQueue::CalcLatency(const SUnit &SU) { return Latency; int MaxSuccLatency = 0; - for (std::set >::const_iterator I = SU.Succs.begin(), - E = SU.Succs.end(); I != E; ++I) + for (SUnit::const_succ_iterator I = SU.Succs.begin(), E = SU.Succs.end(); + I != E; ++I) MaxSuccLatency = std::max(MaxSuccLatency, CalcLatency(*I->first)); return Latency = MaxSuccLatency + SU.Latency; @@ -452,17 +454,19 @@ void LatencyPriorityQueue::CalculatePriorities() { /// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor /// of SU, return it, otherwise return null. -static SUnit *getSingleUnscheduledPred(SUnit *SU) { +SUnit *LatencyPriorityQueue::getSingleUnscheduledPred(SUnit *SU) { SUnit *OnlyAvailablePred = 0; - for (std::set >::const_iterator I = SU->Preds.begin(), - E = SU->Preds.end(); I != E; ++I) - if (!I->first->isScheduled) { + for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + SUnit &Pred = *I->first; + if (!Pred.isScheduled) { // We found an available, but not scheduled, predecessor. If it's the // only one we have found, keep track of it... otherwise give up. - if (OnlyAvailablePred && OnlyAvailablePred != I->first) + if (OnlyAvailablePred && OnlyAvailablePred != &Pred) return 0; - OnlyAvailablePred = I->first; + OnlyAvailablePred = &Pred; } + } return OnlyAvailablePred; } @@ -471,8 +475,8 @@ void LatencyPriorityQueue::push_impl(SUnit *SU) { // Look at all of the successors of this node. Count the number of nodes that // this node is the sole unscheduled node for. unsigned NumNodesBlocking = 0; - for (std::set >::const_iterator I = SU->Succs.begin(), - E = SU->Succs.end(); I != E; ++I) + for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) if (getSingleUnscheduledPred(I->first) == SU) ++NumNodesBlocking; NumNodesSolelyBlocking[SU->NodeNum] = NumNodesBlocking; @@ -486,8 +490,8 @@ void LatencyPriorityQueue::push_impl(SUnit *SU) { // single predecessor has a higher priority, since scheduling it will make // the node available. void LatencyPriorityQueue::ScheduledNode(SUnit *SU) { - for (std::set >::const_iterator I = SU->Succs.begin(), - E = SU->Succs.end(); I != E; ++I) + for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) AdjustPriorityOfUnscheduledPreds(I->first); } diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 5d7fa5fad16b..f247f7ce7657 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -93,7 +93,7 @@ void ScheduleDAGRRList::Schedule() { CalculateDepths(); CalculateHeights(); DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) - SUnits[su].dumpAll(&DAG)); + SUnits[su].dumpAll(&DAG)); AvailableQueue->initNodes(SUnits); @@ -143,8 +143,8 @@ void ScheduleDAGRRList::CommuteNodesToReducePressure() { } } - for (std::set >::iterator I = SU->Preds.begin(), - E = SU->Preds.end(); I != E; ++I) { + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { if (!I->second) OperandSeen.insert(I->first); } @@ -235,8 +235,8 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) { Sequence.push_back(SU); // Bottom up: release predecessors - for (std::set >::iterator I = SU->Preds.begin(), - E = SU->Preds.end(); I != E; ++I) + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) ReleasePred(I->first, I->second, CurCycle); SU->isScheduled = true; } @@ -347,8 +347,8 @@ void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { Sequence.push_back(SU); // Top down: release successors - for (std::set >::iterator I = SU->Succs.begin(), - E = SU->Succs.end(); I != E; ++I) + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) ReleaseSucc(I->first, I->second, CurCycle); SU->isScheduled = true; } @@ -448,7 +448,7 @@ namespace { RegReductionPriorityQueue() : Queue(SF(this)) {} - virtual void initNodes(const std::vector &sunits) {} + virtual void initNodes(std::vector &sunits) {} virtual void releaseState() {} virtual int getSethiUllmanNumber(unsigned NodeNum) const { @@ -485,7 +485,7 @@ namespace { public: BURegReductionPriorityQueue() {} - void initNodes(const std::vector &sunits) { + void initNodes(std::vector &sunits) { SUnits = &sunits; // Add pseudo dependency edges for two-address nodes. AddPseudoTwoAddrDeps(); @@ -521,7 +521,7 @@ namespace { public: TDRegReductionPriorityQueue() {} - void initNodes(const std::vector &sunits) { + void initNodes(std::vector &sunits) { SUnits = &sunits; // Calculate node priorities. CalculatePriorities(); @@ -548,8 +548,8 @@ static bool isFloater(const SUnit *SU) { if (SU->NumPreds == 0) return true; if (SU->NumPreds == 1) { - for (std::set >::iterator I = SU->Preds.begin(), - E = SU->Preds.end(); I != E; ++I) { + for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); + I != E; ++I) { if (I->second) continue; SUnit *PredSU = I->first; @@ -566,8 +566,8 @@ static bool isFloater(const SUnit *SU) { static bool isSimpleFloaterUse(const SUnit *SU) { unsigned NumOps = 0; - for (std::set >::iterator I = SU->Preds.begin(), - E = SU->Preds.end(); I != E; ++I) { + for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { if (I->second) continue; if (++NumOps > 1) return false; @@ -641,8 +641,8 @@ static void isReachable(SUnit *SU, SUnit *TargetSU, } if (!Visited.insert(SU).second) return; - for (std::set >::iterator I = SU->Preds.begin(), - E = SU->Preds.end(); I != E; ++I) + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); I != E; + ++I) isReachable(I->first, TargetSU, Visited, Reached); } @@ -655,8 +655,8 @@ static bool isReachable(SUnit *SU, SUnit *TargetSU) { static SUnit *getDefUsePredecessor(SUnit *SU) { SDNode *DU = SU->Node->getOperand(0).Val; - for (std::set >::iterator - I = SU->Preds.begin(), E = SU->Preds.end(); I != E; ++I) { + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { if (I->second) continue; // ignore chain preds SUnit *PredSU = I->first; if (PredSU->Node == DU) @@ -689,8 +689,8 @@ void BURegReductionPriorityQueue::AddPseudoTwoAddrDeps() { SUnit *DUSU = getDefUsePredecessor(SU); if (!DUSU) continue; - for (std::set >::iterator I = DUSU->Succs.begin(), - E = DUSU->Succs.end(); I != E; ++I) { + for (SUnit::succ_iterator I = DUSU->Succs.begin(), E = DUSU->Succs.end(); + I != E; ++I) { if (I->second) continue; SUnit *SuccSU = I->first; if (SuccSU != SU && @@ -699,9 +699,9 @@ void BURegReductionPriorityQueue::AddPseudoTwoAddrDeps() { if (SuccSU->Depth == SU->Depth && !isReachable(SuccSU, SU)) { DEBUG(std::cerr << "Adding an edge from SU # " << SU->NodeNum << " to SU #" << SuccSU->NodeNum << "\n"); - if (SU->Preds.insert(std::make_pair(SuccSU, true)).second) + if (SU->addPred(SuccSU, true)) SU->NumChainPredsLeft++; - if (SuccSU->Succs.insert(std::make_pair(SU, true)).second) + if (SuccSU->addSucc(SU, true)) SuccSU->NumChainSuccsLeft++; } } @@ -734,8 +734,8 @@ int BURegReductionPriorityQueue::CalcNodePriority(const SUnit *SU) { SethiUllmanNumber = INT_MAX - 10; else { int Extra = 0; - for (std::set >::const_iterator - I = SU->Preds.begin(), E = SU->Preds.end(); I != E; ++I) { + for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { if (I->second) continue; // ignore chain preds SUnit *PredSU = I->first; int PredSethiUllman = CalcNodePriority(PredSU); @@ -763,11 +763,11 @@ void BURegReductionPriorityQueue::CalculatePriorities() { static unsigned SumOfUnscheduledPredsOfSuccs(const SUnit *SU) { unsigned Sum = 0; - for (std::set >::const_iterator - I = SU->Succs.begin(), E = SU->Succs.end(); I != E; ++I) { + for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { SUnit *SuccSU = I->first; - for (std::set >::const_iterator - II = SuccSU->Preds.begin(), EE = SuccSU->Preds.end(); II != EE; ++II) { + for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(), + EE = SuccSU->Preds.end(); II != EE; ++II) { SUnit *PredSU = II->first; if (!PredSU->isScheduled) Sum++; @@ -855,8 +855,8 @@ int TDRegReductionPriorityQueue::CalcNodePriority(const SUnit *SU) { SethiUllmanNumber = 1; else { int Extra = 0; - for (std::set >::const_iterator - I = SU->Preds.begin(), E = SU->Preds.end(); I != E; ++I) { + for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { if (I->second) continue; // ignore chain preds SUnit *PredSU = I->first; int PredSethiUllman = CalcNodePriority(PredSU);