[llvm-exegesis] Resolve variant classes in analysis.

Summary: See PR38884.

Reviewers: gchatelet

Subscribers: tschuett, RKSimon, llvm-commits

Differential Revision: https://reviews.llvm.org/D52825

llvm-svn: 343680
This commit is contained in:
Clement Courbet 2018-10-03 11:50:25 +00:00
parent d934032e48
commit d5a39553ff
3 changed files with 127 additions and 62 deletions

View File

@ -0,0 +1,26 @@
# RUN: llvm-exegesis -mode=analysis -benchmarks-file=%s -analysis-inconsistencies-output-file="" -analysis-numpoints=1 | FileCheck %s
# CHECK: cluster_id,opcode_name,config,sched_class,SBPort0,SBPort1,SBPort23,SBPort4,SBPort5,NumMicroOps
# CHECK-NEXT: SBWriteZeroLatency
---
mode: uops
key:
instructions:
- 'XOR32rr EAX EAX EAX'
config: ''
register_initial_values:
cpu_name: sandybridge
llvm_triple: x86_64-unknown-linux-gnu
num_repetitions: 10000
measurements:
- { key: SBPort0, value: 0.0012, per_snippet_value: 0.0012 }
- { key: SBPort1, value: 0.0021, per_snippet_value: 0.0021 }
- { key: SBPort23, value: 0.0013, per_snippet_value: 0.0013 }
- { key: SBPort4, value: 0.0018, per_snippet_value: 0.0018 }
- { key: SBPort5, value: 0.0012, per_snippet_value: 0.0012 }
- { key: NumMicroOps, value: 1.0108, per_snippet_value: 1.0108 }
error: ''
info: ''
assembled_snippet: 31C031C031C031C031C031C031C031C031C031C031C031C031C031C031C031C0C3
...

View File

@ -19,6 +19,16 @@ namespace exegesis {
static const char kCsvSep = ',';
static unsigned resolveSchedClassId(const llvm::MCSubtargetInfo &STI,
unsigned SchedClassId,
const llvm::MCInst &MCI) {
const auto &SM = STI.getSchedModel();
while (SchedClassId && SM.getSchedClassDesc(SchedClassId)->isVariant())
SchedClassId =
STI.resolveVariantSchedClass(SchedClassId, &MCI, SM.getProcessorID());
return SchedClassId;
}
namespace {
enum EscapeTag { kEscapeCsv, kEscapeHtml, kEscapeHtmlString };
@ -126,11 +136,12 @@ void Analysis::printInstructionRowCsv(const size_t PointId,
writeEscaped<kEscapeCsv>(OS, Point.Key.Config);
OS << kCsvSep;
assert(!Point.Key.Instructions.empty());
// FIXME: Resolve variant classes.
const unsigned SchedClassId =
InstrInfo_->get(Point.Key.Instructions[0].getOpcode()).getSchedClass();
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
const auto &SchedModel = SubtargetInfo_->getSchedModel();
const llvm::MCInst &MCI = Point.Key.Instructions[0];
const unsigned SchedClassId = resolveSchedClassId(
*SubtargetInfo_, InstrInfo_->get(MCI.getOpcode()).getSchedClass(), MCI);
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
const llvm::MCSchedClassDesc *const SCDesc =
SchedModel.getSchedClassDesc(SchedClassId);
writeEscaped<kEscapeCsv>(OS, SCDesc->Name);
@ -193,21 +204,43 @@ Analysis::run<Analysis::PrintClusters>(llvm::raw_ostream &OS) const {
return llvm::Error::success();
}
std::unordered_map<unsigned, std::vector<size_t>>
Analysis::ResolvedSchedClassAndPoints::ResolvedSchedClassAndPoints(
ResolvedSchedClass &&RSC)
: RSC(std::move(RSC)) {}
std::vector<Analysis::ResolvedSchedClassAndPoints>
Analysis::makePointsPerSchedClass() const {
std::unordered_map<unsigned, std::vector<size_t>> PointsPerSchedClass;
std::vector<ResolvedSchedClassAndPoints> Entries;
// Maps SchedClassIds to index in result.
std::unordered_map<unsigned, size_t> SchedClassIdToIndex;
const auto &Points = Clustering_.getPoints();
for (size_t PointId = 0, E = Points.size(); PointId < E; ++PointId) {
const InstructionBenchmark &Point = Points[PointId];
if (!Point.Error.empty())
continue;
assert(!Point.Key.Instructions.empty());
const auto Opcode = Point.Key.Instructions[0].getOpcode();
// FIXME: Resolve variant classes.
PointsPerSchedClass[InstrInfo_->get(Opcode).getSchedClass()].push_back(
PointId);
// FIXME: we should be using the tuple of classes for instructions in the
// snippet as key.
const llvm::MCInst &MCI = Point.Key.Instructions[0];
unsigned SchedClassId = InstrInfo_->get(MCI.getOpcode()).getSchedClass();
const bool WasVariant = SchedClassId && SubtargetInfo_->getSchedModel()
.getSchedClassDesc(SchedClassId)
->isVariant();
SchedClassId = resolveSchedClassId(*SubtargetInfo_, SchedClassId, MCI);
const auto IndexIt = SchedClassIdToIndex.find(SchedClassId);
if (IndexIt == SchedClassIdToIndex.end()) {
// Create a new entry.
SchedClassIdToIndex.emplace(SchedClassId, Entries.size());
ResolvedSchedClassAndPoints Entry(
ResolvedSchedClass(*SubtargetInfo_, SchedClassId, WasVariant));
Entry.PointIds.push_back(PointId);
Entries.push_back(std::move(Entry));
} else {
// Append to the existing entry.
Entries[IndexIt->second].PointIds.push_back(PointId);
}
}
return PointsPerSchedClass;
return Entries;
}
// Uops repeat the same opcode over again. Just show this opcode and show the
@ -239,8 +272,8 @@ writeLatencySnippetHtml(llvm::raw_ostream &OS,
}
void Analysis::printSchedClassClustersHtml(
const std::vector<SchedClassCluster> &Clusters, const SchedClass &SC,
llvm::raw_ostream &OS) const {
const std::vector<SchedClassCluster> &Clusters,
const ResolvedSchedClass &RSC, llvm::raw_ostream &OS) const {
const auto &Points = Clustering_.getPoints();
OS << "<table class=\"sched-class-clusters\">";
OS << "<tr><th>ClusterId</th><th>Opcode/Config</th>";
@ -254,7 +287,7 @@ void Analysis::printSchedClassClustersHtml(
OS << "</tr>";
for (const SchedClassCluster &Cluster : Clusters) {
OS << "<tr class=\""
<< (Cluster.measurementsMatch(*SubtargetInfo_, SC, Clustering_)
<< (Cluster.measurementsMatch(*SubtargetInfo_, RSC, Clustering_)
? "good-cluster"
: "bad-cluster")
<< "\"><td>";
@ -369,12 +402,17 @@ getNonRedundantWriteProcRes(const llvm::MCSchedClassDesc &SCDesc,
return Result;
}
Analysis::SchedClass::SchedClass(const llvm::MCSchedClassDesc &SD,
const llvm::MCSubtargetInfo &STI)
: SCDesc(&SD),
NonRedundantWriteProcRes(getNonRedundantWriteProcRes(SD, STI)),
Analysis::ResolvedSchedClass::ResolvedSchedClass(
const llvm::MCSubtargetInfo &STI, unsigned ResolvedSchedClassId,
bool WasVariant)
: SCDesc(STI.getSchedModel().getSchedClassDesc(ResolvedSchedClassId)),
WasVariant(WasVariant),
NonRedundantWriteProcRes(getNonRedundantWriteProcRes(*SCDesc, STI)),
IdealizedProcResPressure(computeIdealizedProcResPressure(
STI.getSchedModel(), NonRedundantWriteProcRes)) {}
STI.getSchedModel(), NonRedundantWriteProcRes)) {
assert((SCDesc == nullptr || !SCDesc->isVariant()) &&
"ResolvedSchedClass should never be variant");
}
void Analysis::SchedClassCluster::addPoint(
size_t PointId, const InstructionBenchmarkClustering &Clustering) {
@ -407,7 +445,7 @@ static unsigned findProcResIdx(const llvm::MCSubtargetInfo &STI,
}
bool Analysis::SchedClassCluster::measurementsMatch(
const llvm::MCSubtargetInfo &STI, const SchedClass &SC,
const llvm::MCSubtargetInfo &STI, const ResolvedSchedClass &RSC,
const InstructionBenchmarkClustering &Clustering) const {
const size_t NumMeasurements = Representative.size();
std::vector<BenchmarkMeasure> ClusterCenterPoint(NumMeasurements);
@ -424,9 +462,9 @@ bool Analysis::SchedClassCluster::measurementsMatch(
}
// Find the latency.
SchedClassPoint[0].PerInstructionValue = 0.0;
for (unsigned I = 0; I < SC.SCDesc->NumWriteLatencyEntries; ++I) {
for (unsigned I = 0; I < RSC.SCDesc->NumWriteLatencyEntries; ++I) {
const llvm::MCWriteLatencyEntry *const WLE =
STI.getWriteLatencyEntry(SC.SCDesc, I);
STI.getWriteLatencyEntry(RSC.SCDesc, I);
SchedClassPoint[0].PerInstructionValue =
std::max<double>(SchedClassPoint[0].PerInstructionValue, WLE->Cycles);
}
@ -438,17 +476,17 @@ bool Analysis::SchedClassCluster::measurementsMatch(
if (ProcResIdx > 0) {
// Find the pressure on ProcResIdx `Key`.
const auto ProcResPressureIt =
std::find_if(SC.IdealizedProcResPressure.begin(),
SC.IdealizedProcResPressure.end(),
std::find_if(RSC.IdealizedProcResPressure.begin(),
RSC.IdealizedProcResPressure.end(),
[ProcResIdx](const std::pair<uint16_t, float> &WPR) {
return WPR.first == ProcResIdx;
});
SchedClassPoint[I].PerInstructionValue =
ProcResPressureIt == SC.IdealizedProcResPressure.end()
ProcResPressureIt == RSC.IdealizedProcResPressure.end()
? 0.0
: ProcResPressureIt->second;
} else if (Key == "NumMicroOps") {
SchedClassPoint[I].PerInstructionValue = SC.SCDesc->NumMicroOps;
SchedClassPoint[I].PerInstructionValue = RSC.SCDesc->NumMicroOps;
} else {
llvm::errs() << "expected `key` to be either a ProcResIdx or a ProcRes "
"name, got "
@ -465,26 +503,25 @@ bool Analysis::SchedClassCluster::measurementsMatch(
return Clustering.isNeighbour(ClusterCenterPoint, SchedClassPoint);
}
void Analysis::printSchedClassDescHtml(const SchedClass &SC,
void Analysis::printSchedClassDescHtml(const ResolvedSchedClass &RSC,
llvm::raw_ostream &OS) const {
OS << "<table class=\"sched-class-desc\">";
OS << "<tr><th>Valid</th><th>Variant</th><th>NumMicroOps</th><th>Latency</"
"th><th>WriteProcRes</th><th title=\"This is the idealized unit "
"resource (port) pressure assuming ideal distribution\">Idealized "
"Resource Pressure</th></tr>";
if (SC.SCDesc->isValid()) {
if (RSC.SCDesc->isValid()) {
const auto &SM = SubtargetInfo_->getSchedModel();
OS << "<tr><td>&#10004;</td>";
OS << "<td>" << (SC.SCDesc->isVariant() ? "&#10004;" : "&#10005;")
<< "</td>";
OS << "<td>" << SC.SCDesc->NumMicroOps << "</td>";
OS << "<td>" << (RSC.WasVariant ? "&#10004;" : "&#10005;") << "</td>";
OS << "<td>" << RSC.SCDesc->NumMicroOps << "</td>";
// Latencies.
OS << "<td><ul>";
for (int I = 0, E = SC.SCDesc->NumWriteLatencyEntries; I < E; ++I) {
for (int I = 0, E = RSC.SCDesc->NumWriteLatencyEntries; I < E; ++I) {
const auto *const Entry =
SubtargetInfo_->getWriteLatencyEntry(SC.SCDesc, I);
SubtargetInfo_->getWriteLatencyEntry(RSC.SCDesc, I);
OS << "<li>" << Entry->Cycles;
if (SC.SCDesc->NumWriteLatencyEntries > 1) {
if (RSC.SCDesc->NumWriteLatencyEntries > 1) {
// Dismabiguate if more than 1 latency.
OS << " (WriteResourceID " << Entry->WriteResourceID << ")";
}
@ -493,7 +530,7 @@ void Analysis::printSchedClassDescHtml(const SchedClass &SC,
OS << "</ul></td>";
// WriteProcRes.
OS << "<td><ul>";
for (const auto &WPR : SC.NonRedundantWriteProcRes) {
for (const auto &WPR : RSC.NonRedundantWriteProcRes) {
OS << "<li><span class=\"mono\">";
writeEscaped<kEscapeHtml>(OS,
SM.getProcResource(WPR.ProcResourceIdx)->Name);
@ -502,7 +539,7 @@ void Analysis::printSchedClassDescHtml(const SchedClass &SC,
OS << "</ul></td>";
// Idealized port pressure.
OS << "<td><ul>";
for (const auto &Pressure : SC.IdealizedProcResPressure) {
for (const auto &Pressure : RSC.IdealizedProcResPressure) {
OS << "<li><span class=\"mono\">";
writeEscaped<kEscapeHtml>(OS, SubtargetInfo_->getSchedModel()
.getProcResource(Pressure.first)
@ -598,19 +635,12 @@ llvm::Error Analysis::run<Analysis::PrintSchedClassInconsistencies>(
writeEscaped<kEscapeHtml>(OS, FirstPoint.CpuName);
OS << "</span></h3>";
for (const auto &SchedClassAndPoints : makePointsPerSchedClass()) {
const auto SchedClassId = SchedClassAndPoints.first;
const std::vector<size_t> &SchedClassPoints = SchedClassAndPoints.second;
const auto &SchedModel = SubtargetInfo_->getSchedModel();
const llvm::MCSchedClassDesc *const SCDesc =
SchedModel.getSchedClassDesc(SchedClassId);
if (!SCDesc)
for (const auto &RSCAndPoints : makePointsPerSchedClass()) {
if (!RSCAndPoints.RSC.SCDesc)
continue;
const SchedClass SC(*SCDesc, *SubtargetInfo_);
// Bucket sched class points into sched class clusters.
std::vector<SchedClassCluster> SchedClassClusters;
for (const size_t PointId : SchedClassPoints) {
for (const size_t PointId : RSCAndPoints.PointIds) {
const auto &ClusterId = Clustering_.getClusterIdForPoint(PointId);
if (!ClusterId.isValid())
continue; // Ignore noise and errors. FIXME: take noise into account ?
@ -629,24 +659,24 @@ llvm::Error Analysis::run<Analysis::PrintSchedClassInconsistencies>(
// Print any scheduling class that has at least one cluster that does not
// match the checked-in data.
if (std::all_of(SchedClassClusters.begin(), SchedClassClusters.end(),
[this, &SC](const SchedClassCluster &C) {
return C.measurementsMatch(*SubtargetInfo_, SC,
Clustering_);
[this, &RSCAndPoints](const SchedClassCluster &C) {
return C.measurementsMatch(*SubtargetInfo_,
RSCAndPoints.RSC, Clustering_);
}))
continue; // Nothing weird.
OS << "<div class=\"inconsistency\"><p>Sched Class <span "
"class=\"sched-class-name\">";
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
writeEscaped<kEscapeHtml>(OS, SCDesc->Name);
writeEscaped<kEscapeHtml>(OS, RSCAndPoints.RSC.SCDesc->Name);
#else
OS << SchedClassId;
#endif
OS << "</span> contains instructions whose performance characteristics do"
" not match that of LLVM:</p>";
printSchedClassClustersHtml(SchedClassClusters, SC, OS);
printSchedClassClustersHtml(SchedClassClusters, RSCAndPoints.RSC, OS);
OS << "<p>llvm SchedModel data:</p>";
printSchedClassDescHtml(SC, OS);
printSchedClassDescHtml(RSCAndPoints.RSC, OS);
OS << "</div>";
}

View File

@ -49,11 +49,12 @@ private:
using ClusterId = InstructionBenchmarkClustering::ClusterId;
// An llvm::MCSchedClassDesc augmented with some additional data.
struct SchedClass {
SchedClass(const llvm::MCSchedClassDesc &SD,
const llvm::MCSubtargetInfo &STI);
struct ResolvedSchedClass {
ResolvedSchedClass(const llvm::MCSubtargetInfo &STI,
unsigned ResolvedSchedClassId, bool WasVariant);
const llvm::MCSchedClassDesc *const SCDesc;
const bool WasVariant; // Whether the original class was variant.
const llvm::SmallVector<llvm::MCWriteProcResEntry, 8>
NonRedundantWriteProcRes;
const std::vector<std::pair<uint16_t, float>> IdealizedProcResPressure;
@ -75,7 +76,8 @@ private:
// Returns true if the cluster representative measurements match that of SC.
bool
measurementsMatch(const llvm::MCSubtargetInfo &STI, const SchedClass &SC,
measurementsMatch(const llvm::MCSubtargetInfo &STI,
const ResolvedSchedClass &SC,
const InstructionBenchmarkClustering &Clustering) const;
void addPoint(size_t PointId,
@ -92,15 +94,22 @@ private:
void
printSchedClassClustersHtml(const std::vector<SchedClassCluster> &Clusters,
const SchedClass &SC,
const ResolvedSchedClass &SC,
llvm::raw_ostream &OS) const;
void printSchedClassDescHtml(const SchedClass &SC,
void printSchedClassDescHtml(const ResolvedSchedClass &SC,
llvm::raw_ostream &OS) const;
// Builds a map of Sched Class -> indices of points that belong to the sched
// class.
std::unordered_map<unsigned, std::vector<size_t>>
makePointsPerSchedClass() const;
// A pair of (Sched Class, indices of points that belong to the sched
// class).
struct ResolvedSchedClassAndPoints {
explicit ResolvedSchedClassAndPoints(ResolvedSchedClass &&RSC);
ResolvedSchedClass RSC;
std::vector<size_t> PointIds;
};
// Builds a list of ResolvedSchedClassAndPoints.
std::vector<ResolvedSchedClassAndPoints> makePointsPerSchedClass() const;
template <typename EscapeTag, EscapeTag Tag>
void writeSnippet(llvm::raw_ostream &OS, llvm::ArrayRef<uint8_t> Bytes,