misched: ILP scheduler for experimental heuristics.
llvm-svn: 165950
This commit is contained in:
parent
523623c239
commit
90f711da9a
|
@ -110,6 +110,10 @@ public:
|
||||||
/// Initialize the strategy after building the DAG for a new region.
|
/// Initialize the strategy after building the DAG for a new region.
|
||||||
virtual void initialize(ScheduleDAGMI *DAG) = 0;
|
virtual void initialize(ScheduleDAGMI *DAG) = 0;
|
||||||
|
|
||||||
|
/// Notify this strategy that all roots have been released (including those
|
||||||
|
/// that depend on EntrySU or ExitSU).
|
||||||
|
virtual void registerRoots() {}
|
||||||
|
|
||||||
/// Pick the next node to schedule, or return NULL. Set IsTopNode to true to
|
/// Pick the next node to schedule, or return NULL. Set IsTopNode to true to
|
||||||
/// schedule the node at the top of the unscheduled region. Otherwise it will
|
/// schedule the node at the top of the unscheduled region. Otherwise it will
|
||||||
/// be scheduled at the bottom.
|
/// be scheduled at the bottom.
|
||||||
|
|
|
@ -0,0 +1,86 @@
|
||||||
|
//===- ScheduleDAGILP.h - ILP metric for ScheduleDAGInstrs ------*- C++ -*-===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// Definition of an ILP metric for machine level instruction scheduling.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#ifndef LLVM_CODEGEN_SCHEDULEDAGILP_H
|
||||||
|
#define LLVM_CODEGEN_SCHEDULEDAGILP_H
|
||||||
|
|
||||||
|
#include "llvm/Support/DataTypes.h"
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
namespace llvm {
|
||||||
|
|
||||||
|
class raw_ostream;
|
||||||
|
class ScheduleDAGInstrs;
|
||||||
|
class SUnit;
|
||||||
|
|
||||||
|
/// \brief Represent the ILP of the subDAG rooted at a DAG node.
|
||||||
|
struct ILPValue {
|
||||||
|
unsigned InstrCount;
|
||||||
|
unsigned Cycles;
|
||||||
|
|
||||||
|
ILPValue(): InstrCount(0), Cycles(0) {}
|
||||||
|
|
||||||
|
ILPValue(unsigned count, unsigned cycles):
|
||||||
|
InstrCount(count), Cycles(cycles) {}
|
||||||
|
|
||||||
|
bool isValid() const { return Cycles > 0; }
|
||||||
|
|
||||||
|
// Order by the ILP metric's value.
|
||||||
|
bool operator<(ILPValue RHS) const {
|
||||||
|
return (uint64_t)InstrCount * RHS.Cycles
|
||||||
|
< (uint64_t)Cycles * RHS.InstrCount;
|
||||||
|
}
|
||||||
|
bool operator>(ILPValue RHS) const {
|
||||||
|
return RHS < *this;
|
||||||
|
}
|
||||||
|
bool operator<=(ILPValue RHS) const {
|
||||||
|
return (uint64_t)InstrCount * RHS.Cycles
|
||||||
|
<= (uint64_t)Cycles * RHS.InstrCount;
|
||||||
|
}
|
||||||
|
bool operator>=(ILPValue RHS) const {
|
||||||
|
return RHS <= *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
|
||||||
|
void print(raw_ostream &OS) const;
|
||||||
|
|
||||||
|
void dump() const;
|
||||||
|
#endif
|
||||||
|
};
|
||||||
|
|
||||||
|
/// \brief Compute the values of each DAG node for an ILP metric.
|
||||||
|
///
|
||||||
|
/// This metric assumes that the DAG is a forest of trees with roots at the
|
||||||
|
/// bottom of the schedule.
|
||||||
|
class ScheduleDAGILP {
|
||||||
|
bool IsBottomUp;
|
||||||
|
std::vector<ILPValue> ILPValues;
|
||||||
|
|
||||||
|
public:
|
||||||
|
ScheduleDAGILP(bool IsBU): IsBottomUp(IsBU) {}
|
||||||
|
|
||||||
|
/// \brief Initialize the result data with the size of the DAG.
|
||||||
|
void resize(unsigned NumSUnits);
|
||||||
|
|
||||||
|
/// \brief Compute the ILP metric for the subDAG at this root.
|
||||||
|
void computeILP(const SUnit *Root);
|
||||||
|
|
||||||
|
/// \brief Get the ILP value for a DAG node.
|
||||||
|
ILPValue getILP(const SUnit *SU);
|
||||||
|
};
|
||||||
|
|
||||||
|
raw_ostream &operator<<(raw_ostream &OS, const ILPValue &Val);
|
||||||
|
|
||||||
|
} // namespace llvm
|
||||||
|
|
||||||
|
#endif
|
|
@ -18,6 +18,7 @@
|
||||||
#include "llvm/CodeGen/MachineScheduler.h"
|
#include "llvm/CodeGen/MachineScheduler.h"
|
||||||
#include "llvm/CodeGen/Passes.h"
|
#include "llvm/CodeGen/Passes.h"
|
||||||
#include "llvm/CodeGen/RegisterClassInfo.h"
|
#include "llvm/CodeGen/RegisterClassInfo.h"
|
||||||
|
#include "llvm/CodeGen/ScheduleDAGILP.h"
|
||||||
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
|
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
|
||||||
#include "llvm/Analysis/AliasAnalysis.h"
|
#include "llvm/Analysis/AliasAnalysis.h"
|
||||||
#include "llvm/Support/CommandLine.h"
|
#include "llvm/Support/CommandLine.h"
|
||||||
|
@ -451,26 +452,6 @@ updateScheduledPressure(std::vector<unsigned> NewMaxPressure) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Release all DAG roots for scheduling.
|
|
||||||
void ScheduleDAGMI::releaseRoots() {
|
|
||||||
SmallVector<SUnit*, 16> BotRoots;
|
|
||||||
|
|
||||||
for (std::vector<SUnit>::iterator
|
|
||||||
I = SUnits.begin(), E = SUnits.end(); I != E; ++I) {
|
|
||||||
// A SUnit is ready to top schedule if it has no predecessors.
|
|
||||||
if (I->Preds.empty())
|
|
||||||
SchedImpl->releaseTopNode(&(*I));
|
|
||||||
// A SUnit is ready to bottom schedule if it has no successors.
|
|
||||||
if (I->Succs.empty())
|
|
||||||
BotRoots.push_back(&(*I));
|
|
||||||
}
|
|
||||||
// Release bottom roots in reverse order so the higher priority nodes appear
|
|
||||||
// first. This is more natural and slightly more efficient.
|
|
||||||
for (SmallVectorImpl<SUnit*>::const_reverse_iterator
|
|
||||||
I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I)
|
|
||||||
SchedImpl->releaseBottomNode(*I);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// schedule - Called back from MachineScheduler::runOnMachineFunction
|
/// schedule - Called back from MachineScheduler::runOnMachineFunction
|
||||||
/// after setting up the current scheduling region. [RegionBegin, RegionEnd)
|
/// after setting up the current scheduling region. [RegionBegin, RegionEnd)
|
||||||
/// only includes instructions that have DAG nodes, not scheduling boundaries.
|
/// only includes instructions that have DAG nodes, not scheduling boundaries.
|
||||||
|
@ -532,8 +513,29 @@ void ScheduleDAGMI::postprocessDAG() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Release all DAG roots for scheduling.
|
||||||
|
void ScheduleDAGMI::releaseRoots() {
|
||||||
|
SmallVector<SUnit*, 16> BotRoots;
|
||||||
|
|
||||||
|
for (std::vector<SUnit>::iterator
|
||||||
|
I = SUnits.begin(), E = SUnits.end(); I != E; ++I) {
|
||||||
|
// A SUnit is ready to top schedule if it has no predecessors.
|
||||||
|
if (I->Preds.empty())
|
||||||
|
SchedImpl->releaseTopNode(&(*I));
|
||||||
|
// A SUnit is ready to bottom schedule if it has no successors.
|
||||||
|
if (I->Succs.empty())
|
||||||
|
BotRoots.push_back(&(*I));
|
||||||
|
}
|
||||||
|
// Release bottom roots in reverse order so the higher priority nodes appear
|
||||||
|
// first. This is more natural and slightly more efficient.
|
||||||
|
for (SmallVectorImpl<SUnit*>::const_reverse_iterator
|
||||||
|
I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I)
|
||||||
|
SchedImpl->releaseBottomNode(*I);
|
||||||
|
}
|
||||||
|
|
||||||
/// Identify DAG roots and setup scheduler queues.
|
/// Identify DAG roots and setup scheduler queues.
|
||||||
void ScheduleDAGMI::initQueues() {
|
void ScheduleDAGMI::initQueues() {
|
||||||
|
|
||||||
// Initialize the strategy before modifying the DAG.
|
// Initialize the strategy before modifying the DAG.
|
||||||
SchedImpl->initialize(this);
|
SchedImpl->initialize(this);
|
||||||
|
|
||||||
|
@ -544,6 +546,8 @@ void ScheduleDAGMI::initQueues() {
|
||||||
// Release all DAG roots for scheduling.
|
// Release all DAG roots for scheduling.
|
||||||
releaseRoots();
|
releaseRoots();
|
||||||
|
|
||||||
|
SchedImpl->registerRoots();
|
||||||
|
|
||||||
CurrentTop = nextIfDebug(RegionBegin, RegionEnd);
|
CurrentTop = nextIfDebug(RegionBegin, RegionEnd);
|
||||||
CurrentBottom = RegionEnd;
|
CurrentBottom = RegionEnd;
|
||||||
}
|
}
|
||||||
|
@ -1197,6 +1201,86 @@ static MachineSchedRegistry
|
||||||
ConvergingSchedRegistry("converge", "Standard converging scheduler.",
|
ConvergingSchedRegistry("converge", "Standard converging scheduler.",
|
||||||
createConvergingSched);
|
createConvergingSched);
|
||||||
|
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
// ILP Scheduler. Currently for experimental analysis of heuristics.
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
/// \brief Order nodes by the ILP metric.
|
||||||
|
struct ILPOrder {
|
||||||
|
ScheduleDAGILP *ILP;
|
||||||
|
bool MaximizeILP;
|
||||||
|
|
||||||
|
ILPOrder(ScheduleDAGILP *ilp, bool MaxILP): ILP(ilp), MaximizeILP(MaxILP) {}
|
||||||
|
|
||||||
|
/// \brief Apply a less-than relation on node priority.
|
||||||
|
bool operator()(const SUnit *A, const SUnit *B) const {
|
||||||
|
// Return true if A comes after B in the Q.
|
||||||
|
if (MaximizeILP)
|
||||||
|
return ILP->getILP(A) < ILP->getILP(B);
|
||||||
|
else
|
||||||
|
return ILP->getILP(A) > ILP->getILP(B);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
/// \brief Schedule based on the ILP metric.
|
||||||
|
class ILPScheduler : public MachineSchedStrategy {
|
||||||
|
ScheduleDAGILP ILP;
|
||||||
|
ILPOrder Cmp;
|
||||||
|
|
||||||
|
std::vector<SUnit*> ReadyQ;
|
||||||
|
public:
|
||||||
|
ILPScheduler(bool MaximizeILP)
|
||||||
|
: ILP(/*BottomUp=*/true), Cmp(&ILP, MaximizeILP) {}
|
||||||
|
|
||||||
|
virtual void initialize(ScheduleDAGMI *DAG) {
|
||||||
|
ReadyQ.clear();
|
||||||
|
ILP.resize(DAG->SUnits.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual void registerRoots() {
|
||||||
|
for (std::vector<SUnit*>::const_iterator
|
||||||
|
I = ReadyQ.begin(), E = ReadyQ.end(); I != E; ++I) {
|
||||||
|
ILP.computeILP(*I);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Implement MachineSchedStrategy interface.
|
||||||
|
/// -----------------------------------------
|
||||||
|
|
||||||
|
virtual SUnit *pickNode(bool &IsTopNode) {
|
||||||
|
if (ReadyQ.empty()) return NULL;
|
||||||
|
pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
|
||||||
|
SUnit *SU = ReadyQ.back();
|
||||||
|
ReadyQ.pop_back();
|
||||||
|
IsTopNode = false;
|
||||||
|
DEBUG(dbgs() << "*** Scheduling " << *SU->getInstr()
|
||||||
|
<< " ILP: " << ILP.getILP(SU) << '\n');
|
||||||
|
return SU;
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual void schedNode(SUnit *, bool) {}
|
||||||
|
|
||||||
|
virtual void releaseTopNode(SUnit *) { /*only called for top roots*/ }
|
||||||
|
|
||||||
|
virtual void releaseBottomNode(SUnit *SU) {
|
||||||
|
ReadyQ.push_back(SU);
|
||||||
|
std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
static ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) {
|
||||||
|
return new ScheduleDAGMI(C, new ILPScheduler(true));
|
||||||
|
}
|
||||||
|
static ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) {
|
||||||
|
return new ScheduleDAGMI(C, new ILPScheduler(false));
|
||||||
|
}
|
||||||
|
static MachineSchedRegistry ILPMaxRegistry(
|
||||||
|
"ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);
|
||||||
|
static MachineSchedRegistry ILPMinRegistry(
|
||||||
|
"ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler);
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// Machine Instruction Shuffler for Correctness Testing
|
// Machine Instruction Shuffler for Correctness Testing
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
|
@ -22,6 +22,7 @@
|
||||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||||
#include "llvm/CodeGen/PseudoSourceValue.h"
|
#include "llvm/CodeGen/PseudoSourceValue.h"
|
||||||
#include "llvm/CodeGen/RegisterPressure.h"
|
#include "llvm/CodeGen/RegisterPressure.h"
|
||||||
|
#include "llvm/CodeGen/ScheduleDAGILP.h"
|
||||||
#include "llvm/CodeGen/ScheduleDAGInstrs.h"
|
#include "llvm/CodeGen/ScheduleDAGInstrs.h"
|
||||||
#include "llvm/MC/MCInstrItineraries.h"
|
#include "llvm/MC/MCInstrItineraries.h"
|
||||||
#include "llvm/Target/TargetMachine.h"
|
#include "llvm/Target/TargetMachine.h"
|
||||||
|
@ -30,6 +31,7 @@
|
||||||
#include "llvm/Target/TargetSubtargetInfo.h"
|
#include "llvm/Target/TargetSubtargetInfo.h"
|
||||||
#include "llvm/Support/CommandLine.h"
|
#include "llvm/Support/CommandLine.h"
|
||||||
#include "llvm/Support/Debug.h"
|
#include "llvm/Support/Debug.h"
|
||||||
|
#include "llvm/Support/Format.h"
|
||||||
#include "llvm/Support/raw_ostream.h"
|
#include "llvm/Support/raw_ostream.h"
|
||||||
#include "llvm/ADT/SmallSet.h"
|
#include "llvm/ADT/SmallSet.h"
|
||||||
#include "llvm/ADT/SmallPtrSet.h"
|
#include "llvm/ADT/SmallPtrSet.h"
|
||||||
|
@ -933,3 +935,94 @@ std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const {
|
||||||
std::string ScheduleDAGInstrs::getDAGName() const {
|
std::string ScheduleDAGInstrs::getDAGName() const {
|
||||||
return "dag." + BB->getFullName();
|
return "dag." + BB->getFullName();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
/// \brief Manage the stack used by a reverse depth-first search over the DAG.
|
||||||
|
class SchedDAGReverseDFS {
|
||||||
|
std::vector<std::pair<const SUnit*, SUnit::const_pred_iterator> > DFSStack;
|
||||||
|
public:
|
||||||
|
bool isComplete() const { return DFSStack.empty(); }
|
||||||
|
|
||||||
|
void follow(const SUnit *SU) {
|
||||||
|
DFSStack.push_back(std::make_pair(SU, SU->Preds.begin()));
|
||||||
|
}
|
||||||
|
void advance() { ++DFSStack.back().second; }
|
||||||
|
|
||||||
|
void backtrack() { DFSStack.pop_back(); }
|
||||||
|
|
||||||
|
const SUnit *getCurr() const { return DFSStack.back().first; }
|
||||||
|
|
||||||
|
SUnit::const_pred_iterator getPred() const { return DFSStack.back().second; }
|
||||||
|
|
||||||
|
SUnit::const_pred_iterator getPredEnd() const {
|
||||||
|
return getCurr()->Preds.end();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
} // anonymous
|
||||||
|
|
||||||
|
void ScheduleDAGILP::resize(unsigned NumSUnits) {
|
||||||
|
ILPValues.resize(NumSUnits);
|
||||||
|
}
|
||||||
|
|
||||||
|
ILPValue ScheduleDAGILP::getILP(const SUnit *SU) {
|
||||||
|
return ILPValues[SU->NodeNum];
|
||||||
|
}
|
||||||
|
|
||||||
|
// A leaf node has an ILP of 1/1.
|
||||||
|
static ILPValue initILP(const SUnit *SU) {
|
||||||
|
unsigned Cnt = SU->getInstr()->isTransient() ? 0 : 1;
|
||||||
|
return ILPValue(Cnt, 1 + SU->getDepth());
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Compute an ILP metric for all nodes in the subDAG reachable via depth-first
|
||||||
|
/// search from this root.
|
||||||
|
void ScheduleDAGILP::computeILP(const SUnit *Root) {
|
||||||
|
if (!IsBottomUp)
|
||||||
|
llvm_unreachable("Top-down ILP metric is unimplemnted");
|
||||||
|
|
||||||
|
SchedDAGReverseDFS DFS;
|
||||||
|
// Mark a node visited by validating it.
|
||||||
|
ILPValues[Root->NodeNum] = initILP(Root);
|
||||||
|
DFS.follow(Root);
|
||||||
|
for (;;) {
|
||||||
|
// Traverse the leftmost path as far as possible.
|
||||||
|
while (DFS.getPred() != DFS.getPredEnd()) {
|
||||||
|
const SUnit *PredSU = DFS.getPred()->getSUnit();
|
||||||
|
DFS.advance();
|
||||||
|
// If the pred is already valid, skip it.
|
||||||
|
if (ILPValues[PredSU->NodeNum].isValid())
|
||||||
|
continue;
|
||||||
|
ILPValues[PredSU->NodeNum] = initILP(PredSU);
|
||||||
|
DFS.follow(PredSU);
|
||||||
|
}
|
||||||
|
// Visit the top of the stack in postorder and backtrack.
|
||||||
|
unsigned PredCount = ILPValues[DFS.getCurr()->NodeNum].InstrCount;
|
||||||
|
DFS.backtrack();
|
||||||
|
if (DFS.isComplete())
|
||||||
|
break;
|
||||||
|
// Add the recently finished predecessor's bottom-up descendent count.
|
||||||
|
ILPValues[DFS.getCurr()->NodeNum].InstrCount += PredCount;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
|
||||||
|
void ILPValue::print(raw_ostream &OS) const {
|
||||||
|
if (!isValid())
|
||||||
|
OS << "BADILP";
|
||||||
|
OS << InstrCount << " / " << Cycles << " = "
|
||||||
|
<< format("%g", ((double)InstrCount / Cycles));
|
||||||
|
}
|
||||||
|
|
||||||
|
void ILPValue::dump() const {
|
||||||
|
dbgs() << *this << '\n';
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace llvm {
|
||||||
|
|
||||||
|
raw_ostream &operator<<(raw_ostream &OS, const ILPValue &Val) {
|
||||||
|
Val.print(OS);
|
||||||
|
return OS;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace llvm
|
||||||
|
#endif // !NDEBUG || LLVM_ENABLE_DUMP
|
||||||
|
|
|
@ -0,0 +1,25 @@
|
||||||
|
; RUN: llc < %s -march=x86-64 -mcpu=core2 -enable-misched -misched=ilpmax | FileCheck -check-prefix=MAX %s
|
||||||
|
; RUN: llc < %s -march=x86-64 -mcpu=core2 -enable-misched -misched=ilpmin | FileCheck -check-prefix=MIN %s
|
||||||
|
;
|
||||||
|
; Basic verification of the ScheduleDAGILP metric.
|
||||||
|
;
|
||||||
|
; MAX: addss
|
||||||
|
; MAX: addss
|
||||||
|
; MAX: addss
|
||||||
|
; MAX: subss
|
||||||
|
; MAX: addss
|
||||||
|
;
|
||||||
|
; MIN: addss
|
||||||
|
; MIN: addss
|
||||||
|
; MIN: subss
|
||||||
|
; MIN: addss
|
||||||
|
; MIN: addss
|
||||||
|
define float @ilpsched(float %a, float %b, float %c, float %d, float %e, float %f) nounwind uwtable readnone ssp {
|
||||||
|
entry:
|
||||||
|
%add = fadd float %a, %b
|
||||||
|
%add1 = fadd float %c, %d
|
||||||
|
%add2 = fadd float %e, %f
|
||||||
|
%add3 = fsub float %add1, %add2
|
||||||
|
%add4 = fadd float %add, %add3
|
||||||
|
ret float %add4
|
||||||
|
}
|
Loading…
Reference in New Issue