[llvm-exegesis] Resolve variant classes in analysis.
Summary: See PR38884. Reviewers: gchatelet Subscribers: tschuett, RKSimon, llvm-commits Differential Revision: https://reviews.llvm.org/D52825 llvm-svn: 343680
This commit is contained in:
parent
d934032e48
commit
d5a39553ff
|
@ -0,0 +1,26 @@
|
|||
# RUN: llvm-exegesis -mode=analysis -benchmarks-file=%s -analysis-inconsistencies-output-file="" -analysis-numpoints=1 | FileCheck %s
|
||||
|
||||
# CHECK: cluster_id,opcode_name,config,sched_class,SBPort0,SBPort1,SBPort23,SBPort4,SBPort5,NumMicroOps
|
||||
# CHECK-NEXT: SBWriteZeroLatency
|
||||
|
||||
---
|
||||
mode: uops
|
||||
key:
|
||||
instructions:
|
||||
- 'XOR32rr EAX EAX EAX'
|
||||
config: ''
|
||||
register_initial_values:
|
||||
cpu_name: sandybridge
|
||||
llvm_triple: x86_64-unknown-linux-gnu
|
||||
num_repetitions: 10000
|
||||
measurements:
|
||||
- { key: SBPort0, value: 0.0012, per_snippet_value: 0.0012 }
|
||||
- { key: SBPort1, value: 0.0021, per_snippet_value: 0.0021 }
|
||||
- { key: SBPort23, value: 0.0013, per_snippet_value: 0.0013 }
|
||||
- { key: SBPort4, value: 0.0018, per_snippet_value: 0.0018 }
|
||||
- { key: SBPort5, value: 0.0012, per_snippet_value: 0.0012 }
|
||||
- { key: NumMicroOps, value: 1.0108, per_snippet_value: 1.0108 }
|
||||
error: ''
|
||||
info: ''
|
||||
assembled_snippet: 31C031C031C031C031C031C031C031C031C031C031C031C031C031C031C031C0C3
|
||||
...
|
|
@ -19,6 +19,16 @@ namespace exegesis {
|
|||
|
||||
static const char kCsvSep = ',';
|
||||
|
||||
static unsigned resolveSchedClassId(const llvm::MCSubtargetInfo &STI,
|
||||
unsigned SchedClassId,
|
||||
const llvm::MCInst &MCI) {
|
||||
const auto &SM = STI.getSchedModel();
|
||||
while (SchedClassId && SM.getSchedClassDesc(SchedClassId)->isVariant())
|
||||
SchedClassId =
|
||||
STI.resolveVariantSchedClass(SchedClassId, &MCI, SM.getProcessorID());
|
||||
return SchedClassId;
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
enum EscapeTag { kEscapeCsv, kEscapeHtml, kEscapeHtmlString };
|
||||
|
@ -126,11 +136,12 @@ void Analysis::printInstructionRowCsv(const size_t PointId,
|
|||
writeEscaped<kEscapeCsv>(OS, Point.Key.Config);
|
||||
OS << kCsvSep;
|
||||
assert(!Point.Key.Instructions.empty());
|
||||
// FIXME: Resolve variant classes.
|
||||
const unsigned SchedClassId =
|
||||
InstrInfo_->get(Point.Key.Instructions[0].getOpcode()).getSchedClass();
|
||||
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
|
||||
const auto &SchedModel = SubtargetInfo_->getSchedModel();
|
||||
const llvm::MCInst &MCI = Point.Key.Instructions[0];
|
||||
const unsigned SchedClassId = resolveSchedClassId(
|
||||
*SubtargetInfo_, InstrInfo_->get(MCI.getOpcode()).getSchedClass(), MCI);
|
||||
|
||||
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
|
||||
const llvm::MCSchedClassDesc *const SCDesc =
|
||||
SchedModel.getSchedClassDesc(SchedClassId);
|
||||
writeEscaped<kEscapeCsv>(OS, SCDesc->Name);
|
||||
|
@ -193,21 +204,43 @@ Analysis::run<Analysis::PrintClusters>(llvm::raw_ostream &OS) const {
|
|||
return llvm::Error::success();
|
||||
}
|
||||
|
||||
std::unordered_map<unsigned, std::vector<size_t>>
|
||||
Analysis::ResolvedSchedClassAndPoints::ResolvedSchedClassAndPoints(
|
||||
ResolvedSchedClass &&RSC)
|
||||
: RSC(std::move(RSC)) {}
|
||||
|
||||
std::vector<Analysis::ResolvedSchedClassAndPoints>
|
||||
Analysis::makePointsPerSchedClass() const {
|
||||
std::unordered_map<unsigned, std::vector<size_t>> PointsPerSchedClass;
|
||||
std::vector<ResolvedSchedClassAndPoints> Entries;
|
||||
// Maps SchedClassIds to index in result.
|
||||
std::unordered_map<unsigned, size_t> SchedClassIdToIndex;
|
||||
const auto &Points = Clustering_.getPoints();
|
||||
for (size_t PointId = 0, E = Points.size(); PointId < E; ++PointId) {
|
||||
const InstructionBenchmark &Point = Points[PointId];
|
||||
if (!Point.Error.empty())
|
||||
continue;
|
||||
assert(!Point.Key.Instructions.empty());
|
||||
const auto Opcode = Point.Key.Instructions[0].getOpcode();
|
||||
// FIXME: Resolve variant classes.
|
||||
PointsPerSchedClass[InstrInfo_->get(Opcode).getSchedClass()].push_back(
|
||||
PointId);
|
||||
// FIXME: we should be using the tuple of classes for instructions in the
|
||||
// snippet as key.
|
||||
const llvm::MCInst &MCI = Point.Key.Instructions[0];
|
||||
unsigned SchedClassId = InstrInfo_->get(MCI.getOpcode()).getSchedClass();
|
||||
const bool WasVariant = SchedClassId && SubtargetInfo_->getSchedModel()
|
||||
.getSchedClassDesc(SchedClassId)
|
||||
->isVariant();
|
||||
SchedClassId = resolveSchedClassId(*SubtargetInfo_, SchedClassId, MCI);
|
||||
const auto IndexIt = SchedClassIdToIndex.find(SchedClassId);
|
||||
if (IndexIt == SchedClassIdToIndex.end()) {
|
||||
// Create a new entry.
|
||||
SchedClassIdToIndex.emplace(SchedClassId, Entries.size());
|
||||
ResolvedSchedClassAndPoints Entry(
|
||||
ResolvedSchedClass(*SubtargetInfo_, SchedClassId, WasVariant));
|
||||
Entry.PointIds.push_back(PointId);
|
||||
Entries.push_back(std::move(Entry));
|
||||
} else {
|
||||
// Append to the existing entry.
|
||||
Entries[IndexIt->second].PointIds.push_back(PointId);
|
||||
}
|
||||
}
|
||||
return PointsPerSchedClass;
|
||||
return Entries;
|
||||
}
|
||||
|
||||
// Uops repeat the same opcode over again. Just show this opcode and show the
|
||||
|
@ -239,8 +272,8 @@ writeLatencySnippetHtml(llvm::raw_ostream &OS,
|
|||
}
|
||||
|
||||
void Analysis::printSchedClassClustersHtml(
|
||||
const std::vector<SchedClassCluster> &Clusters, const SchedClass &SC,
|
||||
llvm::raw_ostream &OS) const {
|
||||
const std::vector<SchedClassCluster> &Clusters,
|
||||
const ResolvedSchedClass &RSC, llvm::raw_ostream &OS) const {
|
||||
const auto &Points = Clustering_.getPoints();
|
||||
OS << "<table class=\"sched-class-clusters\">";
|
||||
OS << "<tr><th>ClusterId</th><th>Opcode/Config</th>";
|
||||
|
@ -254,7 +287,7 @@ void Analysis::printSchedClassClustersHtml(
|
|||
OS << "</tr>";
|
||||
for (const SchedClassCluster &Cluster : Clusters) {
|
||||
OS << "<tr class=\""
|
||||
<< (Cluster.measurementsMatch(*SubtargetInfo_, SC, Clustering_)
|
||||
<< (Cluster.measurementsMatch(*SubtargetInfo_, RSC, Clustering_)
|
||||
? "good-cluster"
|
||||
: "bad-cluster")
|
||||
<< "\"><td>";
|
||||
|
@ -369,12 +402,17 @@ getNonRedundantWriteProcRes(const llvm::MCSchedClassDesc &SCDesc,
|
|||
return Result;
|
||||
}
|
||||
|
||||
Analysis::SchedClass::SchedClass(const llvm::MCSchedClassDesc &SD,
|
||||
const llvm::MCSubtargetInfo &STI)
|
||||
: SCDesc(&SD),
|
||||
NonRedundantWriteProcRes(getNonRedundantWriteProcRes(SD, STI)),
|
||||
Analysis::ResolvedSchedClass::ResolvedSchedClass(
|
||||
const llvm::MCSubtargetInfo &STI, unsigned ResolvedSchedClassId,
|
||||
bool WasVariant)
|
||||
: SCDesc(STI.getSchedModel().getSchedClassDesc(ResolvedSchedClassId)),
|
||||
WasVariant(WasVariant),
|
||||
NonRedundantWriteProcRes(getNonRedundantWriteProcRes(*SCDesc, STI)),
|
||||
IdealizedProcResPressure(computeIdealizedProcResPressure(
|
||||
STI.getSchedModel(), NonRedundantWriteProcRes)) {}
|
||||
STI.getSchedModel(), NonRedundantWriteProcRes)) {
|
||||
assert((SCDesc == nullptr || !SCDesc->isVariant()) &&
|
||||
"ResolvedSchedClass should never be variant");
|
||||
}
|
||||
|
||||
void Analysis::SchedClassCluster::addPoint(
|
||||
size_t PointId, const InstructionBenchmarkClustering &Clustering) {
|
||||
|
@ -407,7 +445,7 @@ static unsigned findProcResIdx(const llvm::MCSubtargetInfo &STI,
|
|||
}
|
||||
|
||||
bool Analysis::SchedClassCluster::measurementsMatch(
|
||||
const llvm::MCSubtargetInfo &STI, const SchedClass &SC,
|
||||
const llvm::MCSubtargetInfo &STI, const ResolvedSchedClass &RSC,
|
||||
const InstructionBenchmarkClustering &Clustering) const {
|
||||
const size_t NumMeasurements = Representative.size();
|
||||
std::vector<BenchmarkMeasure> ClusterCenterPoint(NumMeasurements);
|
||||
|
@ -424,9 +462,9 @@ bool Analysis::SchedClassCluster::measurementsMatch(
|
|||
}
|
||||
// Find the latency.
|
||||
SchedClassPoint[0].PerInstructionValue = 0.0;
|
||||
for (unsigned I = 0; I < SC.SCDesc->NumWriteLatencyEntries; ++I) {
|
||||
for (unsigned I = 0; I < RSC.SCDesc->NumWriteLatencyEntries; ++I) {
|
||||
const llvm::MCWriteLatencyEntry *const WLE =
|
||||
STI.getWriteLatencyEntry(SC.SCDesc, I);
|
||||
STI.getWriteLatencyEntry(RSC.SCDesc, I);
|
||||
SchedClassPoint[0].PerInstructionValue =
|
||||
std::max<double>(SchedClassPoint[0].PerInstructionValue, WLE->Cycles);
|
||||
}
|
||||
|
@ -438,17 +476,17 @@ bool Analysis::SchedClassCluster::measurementsMatch(
|
|||
if (ProcResIdx > 0) {
|
||||
// Find the pressure on ProcResIdx `Key`.
|
||||
const auto ProcResPressureIt =
|
||||
std::find_if(SC.IdealizedProcResPressure.begin(),
|
||||
SC.IdealizedProcResPressure.end(),
|
||||
std::find_if(RSC.IdealizedProcResPressure.begin(),
|
||||
RSC.IdealizedProcResPressure.end(),
|
||||
[ProcResIdx](const std::pair<uint16_t, float> &WPR) {
|
||||
return WPR.first == ProcResIdx;
|
||||
});
|
||||
SchedClassPoint[I].PerInstructionValue =
|
||||
ProcResPressureIt == SC.IdealizedProcResPressure.end()
|
||||
ProcResPressureIt == RSC.IdealizedProcResPressure.end()
|
||||
? 0.0
|
||||
: ProcResPressureIt->second;
|
||||
} else if (Key == "NumMicroOps") {
|
||||
SchedClassPoint[I].PerInstructionValue = SC.SCDesc->NumMicroOps;
|
||||
SchedClassPoint[I].PerInstructionValue = RSC.SCDesc->NumMicroOps;
|
||||
} else {
|
||||
llvm::errs() << "expected `key` to be either a ProcResIdx or a ProcRes "
|
||||
"name, got "
|
||||
|
@ -465,26 +503,25 @@ bool Analysis::SchedClassCluster::measurementsMatch(
|
|||
return Clustering.isNeighbour(ClusterCenterPoint, SchedClassPoint);
|
||||
}
|
||||
|
||||
void Analysis::printSchedClassDescHtml(const SchedClass &SC,
|
||||
void Analysis::printSchedClassDescHtml(const ResolvedSchedClass &RSC,
|
||||
llvm::raw_ostream &OS) const {
|
||||
OS << "<table class=\"sched-class-desc\">";
|
||||
OS << "<tr><th>Valid</th><th>Variant</th><th>NumMicroOps</th><th>Latency</"
|
||||
"th><th>WriteProcRes</th><th title=\"This is the idealized unit "
|
||||
"resource (port) pressure assuming ideal distribution\">Idealized "
|
||||
"Resource Pressure</th></tr>";
|
||||
if (SC.SCDesc->isValid()) {
|
||||
if (RSC.SCDesc->isValid()) {
|
||||
const auto &SM = SubtargetInfo_->getSchedModel();
|
||||
OS << "<tr><td>✔</td>";
|
||||
OS << "<td>" << (SC.SCDesc->isVariant() ? "✔" : "✕")
|
||||
<< "</td>";
|
||||
OS << "<td>" << SC.SCDesc->NumMicroOps << "</td>";
|
||||
OS << "<td>" << (RSC.WasVariant ? "✔" : "✕") << "</td>";
|
||||
OS << "<td>" << RSC.SCDesc->NumMicroOps << "</td>";
|
||||
// Latencies.
|
||||
OS << "<td><ul>";
|
||||
for (int I = 0, E = SC.SCDesc->NumWriteLatencyEntries; I < E; ++I) {
|
||||
for (int I = 0, E = RSC.SCDesc->NumWriteLatencyEntries; I < E; ++I) {
|
||||
const auto *const Entry =
|
||||
SubtargetInfo_->getWriteLatencyEntry(SC.SCDesc, I);
|
||||
SubtargetInfo_->getWriteLatencyEntry(RSC.SCDesc, I);
|
||||
OS << "<li>" << Entry->Cycles;
|
||||
if (SC.SCDesc->NumWriteLatencyEntries > 1) {
|
||||
if (RSC.SCDesc->NumWriteLatencyEntries > 1) {
|
||||
// Dismabiguate if more than 1 latency.
|
||||
OS << " (WriteResourceID " << Entry->WriteResourceID << ")";
|
||||
}
|
||||
|
@ -493,7 +530,7 @@ void Analysis::printSchedClassDescHtml(const SchedClass &SC,
|
|||
OS << "</ul></td>";
|
||||
// WriteProcRes.
|
||||
OS << "<td><ul>";
|
||||
for (const auto &WPR : SC.NonRedundantWriteProcRes) {
|
||||
for (const auto &WPR : RSC.NonRedundantWriteProcRes) {
|
||||
OS << "<li><span class=\"mono\">";
|
||||
writeEscaped<kEscapeHtml>(OS,
|
||||
SM.getProcResource(WPR.ProcResourceIdx)->Name);
|
||||
|
@ -502,7 +539,7 @@ void Analysis::printSchedClassDescHtml(const SchedClass &SC,
|
|||
OS << "</ul></td>";
|
||||
// Idealized port pressure.
|
||||
OS << "<td><ul>";
|
||||
for (const auto &Pressure : SC.IdealizedProcResPressure) {
|
||||
for (const auto &Pressure : RSC.IdealizedProcResPressure) {
|
||||
OS << "<li><span class=\"mono\">";
|
||||
writeEscaped<kEscapeHtml>(OS, SubtargetInfo_->getSchedModel()
|
||||
.getProcResource(Pressure.first)
|
||||
|
@ -598,19 +635,12 @@ llvm::Error Analysis::run<Analysis::PrintSchedClassInconsistencies>(
|
|||
writeEscaped<kEscapeHtml>(OS, FirstPoint.CpuName);
|
||||
OS << "</span></h3>";
|
||||
|
||||
for (const auto &SchedClassAndPoints : makePointsPerSchedClass()) {
|
||||
const auto SchedClassId = SchedClassAndPoints.first;
|
||||
const std::vector<size_t> &SchedClassPoints = SchedClassAndPoints.second;
|
||||
const auto &SchedModel = SubtargetInfo_->getSchedModel();
|
||||
const llvm::MCSchedClassDesc *const SCDesc =
|
||||
SchedModel.getSchedClassDesc(SchedClassId);
|
||||
if (!SCDesc)
|
||||
for (const auto &RSCAndPoints : makePointsPerSchedClass()) {
|
||||
if (!RSCAndPoints.RSC.SCDesc)
|
||||
continue;
|
||||
const SchedClass SC(*SCDesc, *SubtargetInfo_);
|
||||
|
||||
// Bucket sched class points into sched class clusters.
|
||||
std::vector<SchedClassCluster> SchedClassClusters;
|
||||
for (const size_t PointId : SchedClassPoints) {
|
||||
for (const size_t PointId : RSCAndPoints.PointIds) {
|
||||
const auto &ClusterId = Clustering_.getClusterIdForPoint(PointId);
|
||||
if (!ClusterId.isValid())
|
||||
continue; // Ignore noise and errors. FIXME: take noise into account ?
|
||||
|
@ -629,24 +659,24 @@ llvm::Error Analysis::run<Analysis::PrintSchedClassInconsistencies>(
|
|||
// Print any scheduling class that has at least one cluster that does not
|
||||
// match the checked-in data.
|
||||
if (std::all_of(SchedClassClusters.begin(), SchedClassClusters.end(),
|
||||
[this, &SC](const SchedClassCluster &C) {
|
||||
return C.measurementsMatch(*SubtargetInfo_, SC,
|
||||
Clustering_);
|
||||
[this, &RSCAndPoints](const SchedClassCluster &C) {
|
||||
return C.measurementsMatch(*SubtargetInfo_,
|
||||
RSCAndPoints.RSC, Clustering_);
|
||||
}))
|
||||
continue; // Nothing weird.
|
||||
|
||||
OS << "<div class=\"inconsistency\"><p>Sched Class <span "
|
||||
"class=\"sched-class-name\">";
|
||||
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
|
||||
writeEscaped<kEscapeHtml>(OS, SCDesc->Name);
|
||||
writeEscaped<kEscapeHtml>(OS, RSCAndPoints.RSC.SCDesc->Name);
|
||||
#else
|
||||
OS << SchedClassId;
|
||||
#endif
|
||||
OS << "</span> contains instructions whose performance characteristics do"
|
||||
" not match that of LLVM:</p>";
|
||||
printSchedClassClustersHtml(SchedClassClusters, SC, OS);
|
||||
printSchedClassClustersHtml(SchedClassClusters, RSCAndPoints.RSC, OS);
|
||||
OS << "<p>llvm SchedModel data:</p>";
|
||||
printSchedClassDescHtml(SC, OS);
|
||||
printSchedClassDescHtml(RSCAndPoints.RSC, OS);
|
||||
OS << "</div>";
|
||||
}
|
||||
|
||||
|
|
|
@ -49,11 +49,12 @@ private:
|
|||
using ClusterId = InstructionBenchmarkClustering::ClusterId;
|
||||
|
||||
// An llvm::MCSchedClassDesc augmented with some additional data.
|
||||
struct SchedClass {
|
||||
SchedClass(const llvm::MCSchedClassDesc &SD,
|
||||
const llvm::MCSubtargetInfo &STI);
|
||||
struct ResolvedSchedClass {
|
||||
ResolvedSchedClass(const llvm::MCSubtargetInfo &STI,
|
||||
unsigned ResolvedSchedClassId, bool WasVariant);
|
||||
|
||||
const llvm::MCSchedClassDesc *const SCDesc;
|
||||
const bool WasVariant; // Whether the original class was variant.
|
||||
const llvm::SmallVector<llvm::MCWriteProcResEntry, 8>
|
||||
NonRedundantWriteProcRes;
|
||||
const std::vector<std::pair<uint16_t, float>> IdealizedProcResPressure;
|
||||
|
@ -75,7 +76,8 @@ private:
|
|||
|
||||
// Returns true if the cluster representative measurements match that of SC.
|
||||
bool
|
||||
measurementsMatch(const llvm::MCSubtargetInfo &STI, const SchedClass &SC,
|
||||
measurementsMatch(const llvm::MCSubtargetInfo &STI,
|
||||
const ResolvedSchedClass &SC,
|
||||
const InstructionBenchmarkClustering &Clustering) const;
|
||||
|
||||
void addPoint(size_t PointId,
|
||||
|
@ -92,15 +94,22 @@ private:
|
|||
|
||||
void
|
||||
printSchedClassClustersHtml(const std::vector<SchedClassCluster> &Clusters,
|
||||
const SchedClass &SC,
|
||||
const ResolvedSchedClass &SC,
|
||||
llvm::raw_ostream &OS) const;
|
||||
void printSchedClassDescHtml(const SchedClass &SC,
|
||||
void printSchedClassDescHtml(const ResolvedSchedClass &SC,
|
||||
llvm::raw_ostream &OS) const;
|
||||
|
||||
// Builds a map of Sched Class -> indices of points that belong to the sched
|
||||
// class.
|
||||
std::unordered_map<unsigned, std::vector<size_t>>
|
||||
makePointsPerSchedClass() const;
|
||||
// A pair of (Sched Class, indices of points that belong to the sched
|
||||
// class).
|
||||
struct ResolvedSchedClassAndPoints {
|
||||
explicit ResolvedSchedClassAndPoints(ResolvedSchedClass &&RSC);
|
||||
|
||||
ResolvedSchedClass RSC;
|
||||
std::vector<size_t> PointIds;
|
||||
};
|
||||
|
||||
// Builds a list of ResolvedSchedClassAndPoints.
|
||||
std::vector<ResolvedSchedClassAndPoints> makePointsPerSchedClass() const;
|
||||
|
||||
template <typename EscapeTag, EscapeTag Tag>
|
||||
void writeSnippet(llvm::raw_ostream &OS, llvm::ArrayRef<uint8_t> Bytes,
|
||||
|
|
Loading…
Reference in New Issue