[llvm-exegesis] Output the unscaled value as well as the scaled one.

Summary: See PR38936 for context.

Reviewers: gchatelet

Subscribers: tschuett, llvm-commits

Differential Revision: https://reviews.llvm.org/D52500

llvm-svn: 343081
This commit is contained in:
Clement Courbet 2018-09-26 08:37:21 +00:00
parent cc525e7b8d
commit 684a5f6753
13 changed files with 70 additions and 49 deletions

View File

@ -139,7 +139,7 @@ void Analysis::printInstructionRowCsv(const size_t PointId,
#endif
for (const auto &Measurement : Point.Measurements) {
OS << kCsvSep;
writeMeasurementValue<kEscapeCsv>(OS, Measurement.Value);
writeMeasurementValue<kEscapeCsv>(OS, Measurement.PerInstructionValue);
}
OS << "\n";
}
@ -410,14 +410,14 @@ bool Analysis::SchedClassCluster::measurementsMatch(
return false;
}
// Find the latency.
SchedClassPoint[0].Value = 0.0;
SchedClassPoint[0].PerInstructionValue = 0.0;
for (unsigned I = 0; I < SC.SCDesc->NumWriteLatencyEntries; ++I) {
const llvm::MCWriteLatencyEntry *const WLE =
STI.getWriteLatencyEntry(SC.SCDesc, I);
SchedClassPoint[0].Value =
std::max<double>(SchedClassPoint[0].Value, WLE->Cycles);
SchedClassPoint[0].PerInstructionValue =
std::max<double>(SchedClassPoint[0].PerInstructionValue, WLE->Cycles);
}
ClusterCenterPoint[0].Value = Representative[0].avg();
ClusterCenterPoint[0].PerInstructionValue = Representative[0].avg();
} else if (Mode == InstructionBenchmark::Uops) {
for (int I = 0, E = Representative.size(); I < E; ++I) {
// Find the pressure on ProcResIdx `Key`.
@ -433,11 +433,11 @@ bool Analysis::SchedClassCluster::measurementsMatch(
[ProcResIdx](const std::pair<uint16_t, float> &WPR) {
return WPR.first == ProcResIdx;
});
SchedClassPoint[I].Value =
SchedClassPoint[I].PerInstructionValue =
ProcResPressureIt == SC.IdealizedProcResPressure.end()
? 0.0
: ProcResPressureIt->second;
ClusterCenterPoint[I].Value = Representative[I].avg();
ClusterCenterPoint[I].PerInstructionValue = Representative[I].avg();
}
} else {
llvm::errs() << "unimplemented measurement matching for mode " << Mode

View File

@ -69,7 +69,7 @@ private:
const std::vector<size_t> &getPointIds() const { return PointIds; }
// Return the cluster centroid.
const std::vector<BenchmarkMeasureStats> &getRepresentative() const {
const std::vector<PerInstructionStats> &getRepresentative() const {
return Representative;
}
@ -85,7 +85,7 @@ private:
InstructionBenchmarkClustering::ClusterId ClusterId;
std::vector<size_t> PointIds;
// Measurement stats for the points in the SchedClassCluster.
std::vector<BenchmarkMeasureStats> Representative;
std::vector<PerInstructionStats> Representative;
};
void printInstructionRowCsv(size_t PointId, llvm::raw_ostream &OS) const;

View File

@ -164,9 +164,10 @@ template <> struct SequenceElementTraits<exegesis::BenchmarkMeasure> {
// e.g. { "key": "the key", "value": 0123 }
template <> struct MappingTraits<exegesis::BenchmarkMeasure> {
static void mapping(IO &Io, exegesis::BenchmarkMeasure &Obj) {
Io.mapRequired("key", Obj.Key);
Io.mapRequired("value", Obj.Value);
Io.mapOptional("debug_string", Obj.DebugString);
Io.mapRequired("value", Obj.PerInstructionValue);
Io.mapOptional("per_snippet_value", Obj.PerSnippetValue);
Io.mapRequired("key", Obj.Key);
}
static const bool flow = true;
};
@ -345,14 +346,14 @@ llvm::Error InstructionBenchmark::writeYaml(const LLVMState &State,
return llvm::Error::success();
}
void BenchmarkMeasureStats::push(const BenchmarkMeasure &BM) {
void PerInstructionStats::push(const BenchmarkMeasure &BM) {
if (Key.empty())
Key = BM.Key;
assert(Key == BM.Key);
++NumValues;
SumValues += BM.Value;
MaxValue = std::max(MaxValue, BM.Value);
MinValue = std::min(MinValue, BM.Value);
SumValues += BM.PerInstructionValue;
MaxValue = std::max(MaxValue, BM.PerInstructionValue);
MinValue = std::min(MinValue, BM.PerInstructionValue);
}
} // namespace exegesis

View File

@ -42,7 +42,12 @@ struct InstructionBenchmarkKey {
struct BenchmarkMeasure {
std::string Key;
double Value;
// This is the per-instruction value, i.e. measured quantity scaled per
// instruction.
double PerInstructionValue;
// This is the per-snippet value, i.e. measured quantity for one repetition of
// the whole snippet.
double PerSnippetValue;
std::string DebugString;
};
@ -81,7 +86,7 @@ struct InstructionBenchmark {
// Utilities to work with Benchmark measures.
// A class that measures stats over benchmark measures.
class BenchmarkMeasureStats {
class PerInstructionStats {
public:
void push(const BenchmarkMeasure &BM);

View File

@ -88,7 +88,15 @@ BenchmarkRunner::runConfiguration(const BenchmarkCode &BC,
<< *ObjectFilePath << "\n";
const ExecutableFunction EF(State.createTargetMachine(),
getObjectFromFile(*ObjectFilePath));
InstrBenchmark.Measurements = runMeasurements(EF, *Scratch, NumRepetitions);
InstrBenchmark.Measurements = runMeasurements(EF, *Scratch);
assert(InstrBenchmark.NumRepetitions > 0 && "invalid NumRepetitions");
for (BenchmarkMeasure &BM : InstrBenchmark.Measurements) {
// Scale the measurements by instruction.
BM.PerInstructionValue /= InstrBenchmark.NumRepetitions;
// Scale the measurements by snippet.
BM.PerSnippetValue *= static_cast<double>(BC.Instructions.size()) /
InstrBenchmark.NumRepetitions;
}
return InstrBenchmark;
}

View File

@ -69,8 +69,8 @@ protected:
private:
virtual std::vector<BenchmarkMeasure>
runMeasurements(const ExecutableFunction &EF, ScratchSpace &Scratch,
const unsigned NumRepetitions) const = 0;
runMeasurements(const ExecutableFunction &EF,
ScratchSpace &Scratch) const = 0;
llvm::Expected<std::string>
writeObjectFile(const BenchmarkCode &Configuration,

View File

@ -53,7 +53,7 @@ bool InstructionBenchmarkClustering::isNeighbour(
const std::vector<BenchmarkMeasure> &Q) const {
double DistanceSquared = 0.0;
for (size_t I = 0, E = P.size(); I < E; ++I) {
const auto Diff = P[I].Value - Q[I].Value;
const auto Diff = P[I].PerInstructionValue - Q[I].PerInstructionValue;
DistanceSquared += Diff * Diff;
}
return DistanceSquared <= EpsilonSquared_;

View File

@ -109,8 +109,7 @@ LatencyBenchmarkRunner::~LatencyBenchmarkRunner() = default;
std::vector<BenchmarkMeasure>
LatencyBenchmarkRunner::runMeasurements(const ExecutableFunction &Function,
ScratchSpace &Scratch,
const unsigned NumRepetitions) const {
ScratchSpace &Scratch) const {
// Cycle measurements include some overhead from the kernel. Repeat the
// measure several times and take the minimum value.
constexpr const int NumMeasurements = 30;
@ -131,7 +130,8 @@ LatencyBenchmarkRunner::runMeasurements(const ExecutableFunction &Function,
if (Value < MinLatency)
MinLatency = Value;
}
return {{"latency", static_cast<double>(MinLatency) / NumRepetitions, ""}};
return {{"latency", static_cast<double>(MinLatency),
static_cast<double>(MinLatency), ""}};
}
} // namespace exegesis

View File

@ -44,8 +44,8 @@ public:
private:
std::vector<BenchmarkMeasure>
runMeasurements(const ExecutableFunction &EF, ScratchSpace &Scratch,
const unsigned NumRepetitions) const override;
runMeasurements(const ExecutableFunction &EF,
ScratchSpace &Scratch) const override;
virtual const char *getCounterName() const;
};

View File

@ -252,8 +252,7 @@ UopsSnippetGenerator::generateCodeTemplate(unsigned Opcode) const {
std::vector<BenchmarkMeasure>
UopsBenchmarkRunner::runMeasurements(const ExecutableFunction &Function,
ScratchSpace &Scratch,
const unsigned NumRepetitions) const {
ScratchSpace &Scratch) const {
const auto &SchedModel = State.getSubtargetInfo().getSchedModel();
std::vector<BenchmarkMeasure> Result;
@ -281,7 +280,8 @@ UopsBenchmarkRunner::runMeasurements(const ExecutableFunction &Function,
CounterValue += Counter.read();
}
Result.push_back({llvm::itostr(ProcResIdx),
static_cast<double>(CounterValue) / NumRepetitions,
static_cast<double>(CounterValue),
static_cast<double>(CounterValue),
SchedModel.getProcResource(ProcResIdx)->Name});
}
return Result;

View File

@ -71,8 +71,8 @@ public:
private:
std::vector<BenchmarkMeasure>
runMeasurements(const ExecutableFunction &EF, ScratchSpace &Scratch,
const unsigned NumRepetitions) const override;
runMeasurements(const ExecutableFunction &EF,
ScratchSpace &Scratch) const override;
};
} // namespace exegesis

View File

@ -26,13 +26,18 @@ TEST(ClusteringTest, Clusters3D) {
std::vector<InstructionBenchmark> Points(6);
// Cluster around (x=0, y=1, z=2): points {0, 3}.
Points[0].Measurements = {{"x", 0.01, ""}, {"y", 1.02, ""}, {"z", 1.98, "A"}};
Points[3].Measurements = {{"x", -0.01, ""}, {"y", 1.02, ""}, {"z", 1.98, ""}};
Points[0].Measurements = {
{"x", 0.01, 0.0, ""}, {"y", 1.02, 0.0, ""}, {"z", 1.98, 0.0, "A"}};
Points[3].Measurements = {
{"x", -0.01, 0.0, ""}, {"y", 1.02, 0.0, ""}, {"z", 1.98, 0.0, ""}};
// Cluster around (x=1, y=1, z=2): points {1, 4}.
Points[1].Measurements = {{"x", 1.01, ""}, {"y", 1.02, ""}, {"z", 1.98, ""}};
Points[4].Measurements = {{"x", 0.99, ""}, {"y", 1.02, ""}, {"z", 1.98, ""}};
Points[1].Measurements = {
{"x", 1.01, 0.0, ""}, {"y", 1.02, 0.0, ""}, {"z", 1.98, 0.0, ""}};
Points[4].Measurements = {
{"x", 0.99, 0.0, ""}, {"y", 1.02, 0.0, ""}, {"z", 1.98, 0.0, ""}};
// Cluster around (x=0, y=0, z=0): points {5}, marked as noise.
Points[5].Measurements = {{"x", 0.0, ""}, {"y", 0.01, ""}, {"z", -0.02, ""}};
Points[5].Measurements = {
{"x", 0.0, 0.0, ""}, {"y", 0.01, 0.0, ""}, {"z", -0.02, 0.0, ""}};
// Error cluster: points {2}
Points[2].Error = "oops";
@ -64,8 +69,9 @@ TEST(ClusteringTest, Clusters3D) {
TEST(ClusteringTest, Clusters3D_InvalidSize) {
std::vector<InstructionBenchmark> Points(6);
Points[0].Measurements = {{"x", 0.01, ""}, {"y", 1.02, ""}, {"z", 1.98, ""}};
Points[1].Measurements = {{"y", 1.02, ""}, {"z", 1.98, ""}};
Points[0].Measurements = {
{"x", 0.01, 0.0, ""}, {"y", 1.02, 0.0, ""}, {"z", 1.98, 0.0, ""}};
Points[1].Measurements = {{"y", 1.02, 0.0, ""}, {"z", 1.98, 0.0, ""}};
auto Error =
InstructionBenchmarkClustering::create(Points, 2, 0.25).takeError();
ASSERT_TRUE((bool)Error);
@ -74,8 +80,8 @@ TEST(ClusteringTest, Clusters3D_InvalidSize) {
TEST(ClusteringTest, Clusters3D_InvalidOrder) {
std::vector<InstructionBenchmark> Points(6);
Points[0].Measurements = {{"x", 0.01, ""}, {"y", 1.02, ""}};
Points[1].Measurements = {{"y", 1.02, ""}, {"x", 1.98, ""}};
Points[0].Measurements = {{"x", 0.01, 0.0, ""}, {"y", 1.02, 0.0, ""}};
Points[1].Measurements = {{"y", 1.02, 0.0, ""}, {"x", 1.98, 0.0, ""}};
auto Error =
InstructionBenchmarkClustering::create(Points, 2, 0.25).takeError();
ASSERT_TRUE((bool)Error);

View File

@ -28,7 +28,8 @@ using ::testing::Property;
namespace exegesis {
bool operator==(const BenchmarkMeasure &A, const BenchmarkMeasure &B) {
return std::tie(A.Key, A.Value) == std::tie(B.Key, B.Value);
return std::tie(A.Key, A.PerInstructionValue, A.PerSnippetValue) ==
std::tie(B.Key, B.PerInstructionValue, B.PerSnippetValue);
}
static std::string Dump(const llvm::MCInst &McInst) {
@ -75,8 +76,8 @@ TEST(BenchmarkResultTest, WriteToAndReadFromDisk) {
ToDisk.CpuName = "cpu_name";
ToDisk.LLVMTriple = "llvm_triple";
ToDisk.NumRepetitions = 1;
ToDisk.Measurements.push_back(BenchmarkMeasure{"a", 1, "debug a"});
ToDisk.Measurements.push_back(BenchmarkMeasure{"b", 2, ""});
ToDisk.Measurements.push_back(BenchmarkMeasure{"a", 1, 1, "debug a"});
ToDisk.Measurements.push_back(BenchmarkMeasure{"b", 2, 2, ""});
ToDisk.Error = "error";
ToDisk.Info = "info";
@ -123,12 +124,12 @@ TEST(BenchmarkResultTest, WriteToAndReadFromDisk) {
}
}
TEST(BenchmarkResultTest, BenchmarkMeasureStats) {
BenchmarkMeasureStats Stats;
Stats.push(BenchmarkMeasure{"a", 0.5, "debug a"});
Stats.push(BenchmarkMeasure{"a", 1.5, "debug a"});
Stats.push(BenchmarkMeasure{"a", -1.0, "debug a"});
Stats.push(BenchmarkMeasure{"a", 0.0, "debug a"});
TEST(BenchmarkResultTest, PerInstructionStats) {
PerInstructionStats Stats;
Stats.push(BenchmarkMeasure{"a", 0.5, 0.0, "debug a"});
Stats.push(BenchmarkMeasure{"a", 1.5, 0.0, "debug a"});
Stats.push(BenchmarkMeasure{"a", -1.0, 0.0, "debug a"});
Stats.push(BenchmarkMeasure{"a", 0.0, 0.0, "debug a"});
EXPECT_EQ(Stats.min(), -1.0);
EXPECT_EQ(Stats.max(), 1.5);
EXPECT_EQ(Stats.avg(), 0.25); // (0.5+1.5-1.0+0.0) / 4