[llvm-exegesis] Output the unscaled value as well as the scaled one.
Summary: See PR38936 for context. Reviewers: gchatelet Subscribers: tschuett, llvm-commits Differential Revision: https://reviews.llvm.org/D52500 llvm-svn: 343081
This commit is contained in:
parent
cc525e7b8d
commit
684a5f6753
|
@ -139,7 +139,7 @@ void Analysis::printInstructionRowCsv(const size_t PointId,
|
|||
#endif
|
||||
for (const auto &Measurement : Point.Measurements) {
|
||||
OS << kCsvSep;
|
||||
writeMeasurementValue<kEscapeCsv>(OS, Measurement.Value);
|
||||
writeMeasurementValue<kEscapeCsv>(OS, Measurement.PerInstructionValue);
|
||||
}
|
||||
OS << "\n";
|
||||
}
|
||||
|
@ -410,14 +410,14 @@ bool Analysis::SchedClassCluster::measurementsMatch(
|
|||
return false;
|
||||
}
|
||||
// Find the latency.
|
||||
SchedClassPoint[0].Value = 0.0;
|
||||
SchedClassPoint[0].PerInstructionValue = 0.0;
|
||||
for (unsigned I = 0; I < SC.SCDesc->NumWriteLatencyEntries; ++I) {
|
||||
const llvm::MCWriteLatencyEntry *const WLE =
|
||||
STI.getWriteLatencyEntry(SC.SCDesc, I);
|
||||
SchedClassPoint[0].Value =
|
||||
std::max<double>(SchedClassPoint[0].Value, WLE->Cycles);
|
||||
SchedClassPoint[0].PerInstructionValue =
|
||||
std::max<double>(SchedClassPoint[0].PerInstructionValue, WLE->Cycles);
|
||||
}
|
||||
ClusterCenterPoint[0].Value = Representative[0].avg();
|
||||
ClusterCenterPoint[0].PerInstructionValue = Representative[0].avg();
|
||||
} else if (Mode == InstructionBenchmark::Uops) {
|
||||
for (int I = 0, E = Representative.size(); I < E; ++I) {
|
||||
// Find the pressure on ProcResIdx `Key`.
|
||||
|
@ -433,11 +433,11 @@ bool Analysis::SchedClassCluster::measurementsMatch(
|
|||
[ProcResIdx](const std::pair<uint16_t, float> &WPR) {
|
||||
return WPR.first == ProcResIdx;
|
||||
});
|
||||
SchedClassPoint[I].Value =
|
||||
SchedClassPoint[I].PerInstructionValue =
|
||||
ProcResPressureIt == SC.IdealizedProcResPressure.end()
|
||||
? 0.0
|
||||
: ProcResPressureIt->second;
|
||||
ClusterCenterPoint[I].Value = Representative[I].avg();
|
||||
ClusterCenterPoint[I].PerInstructionValue = Representative[I].avg();
|
||||
}
|
||||
} else {
|
||||
llvm::errs() << "unimplemented measurement matching for mode " << Mode
|
||||
|
|
|
@ -69,7 +69,7 @@ private:
|
|||
const std::vector<size_t> &getPointIds() const { return PointIds; }
|
||||
|
||||
// Return the cluster centroid.
|
||||
const std::vector<BenchmarkMeasureStats> &getRepresentative() const {
|
||||
const std::vector<PerInstructionStats> &getRepresentative() const {
|
||||
return Representative;
|
||||
}
|
||||
|
||||
|
@ -85,7 +85,7 @@ private:
|
|||
InstructionBenchmarkClustering::ClusterId ClusterId;
|
||||
std::vector<size_t> PointIds;
|
||||
// Measurement stats for the points in the SchedClassCluster.
|
||||
std::vector<BenchmarkMeasureStats> Representative;
|
||||
std::vector<PerInstructionStats> Representative;
|
||||
};
|
||||
|
||||
void printInstructionRowCsv(size_t PointId, llvm::raw_ostream &OS) const;
|
||||
|
|
|
@ -164,9 +164,10 @@ template <> struct SequenceElementTraits<exegesis::BenchmarkMeasure> {
|
|||
// e.g. { "key": "the key", "value": 0123 }
|
||||
template <> struct MappingTraits<exegesis::BenchmarkMeasure> {
|
||||
static void mapping(IO &Io, exegesis::BenchmarkMeasure &Obj) {
|
||||
Io.mapRequired("key", Obj.Key);
|
||||
Io.mapRequired("value", Obj.Value);
|
||||
Io.mapOptional("debug_string", Obj.DebugString);
|
||||
Io.mapRequired("value", Obj.PerInstructionValue);
|
||||
Io.mapOptional("per_snippet_value", Obj.PerSnippetValue);
|
||||
Io.mapRequired("key", Obj.Key);
|
||||
}
|
||||
static const bool flow = true;
|
||||
};
|
||||
|
@ -345,14 +346,14 @@ llvm::Error InstructionBenchmark::writeYaml(const LLVMState &State,
|
|||
return llvm::Error::success();
|
||||
}
|
||||
|
||||
void BenchmarkMeasureStats::push(const BenchmarkMeasure &BM) {
|
||||
void PerInstructionStats::push(const BenchmarkMeasure &BM) {
|
||||
if (Key.empty())
|
||||
Key = BM.Key;
|
||||
assert(Key == BM.Key);
|
||||
++NumValues;
|
||||
SumValues += BM.Value;
|
||||
MaxValue = std::max(MaxValue, BM.Value);
|
||||
MinValue = std::min(MinValue, BM.Value);
|
||||
SumValues += BM.PerInstructionValue;
|
||||
MaxValue = std::max(MaxValue, BM.PerInstructionValue);
|
||||
MinValue = std::min(MinValue, BM.PerInstructionValue);
|
||||
}
|
||||
|
||||
} // namespace exegesis
|
||||
|
|
|
@ -42,7 +42,12 @@ struct InstructionBenchmarkKey {
|
|||
|
||||
struct BenchmarkMeasure {
|
||||
std::string Key;
|
||||
double Value;
|
||||
// This is the per-instruction value, i.e. measured quantity scaled per
|
||||
// instruction.
|
||||
double PerInstructionValue;
|
||||
// This is the per-snippet value, i.e. measured quantity for one repetition of
|
||||
// the whole snippet.
|
||||
double PerSnippetValue;
|
||||
std::string DebugString;
|
||||
};
|
||||
|
||||
|
@ -81,7 +86,7 @@ struct InstructionBenchmark {
|
|||
// Utilities to work with Benchmark measures.
|
||||
|
||||
// A class that measures stats over benchmark measures.
|
||||
class BenchmarkMeasureStats {
|
||||
class PerInstructionStats {
|
||||
public:
|
||||
void push(const BenchmarkMeasure &BM);
|
||||
|
||||
|
|
|
@ -88,7 +88,15 @@ BenchmarkRunner::runConfiguration(const BenchmarkCode &BC,
|
|||
<< *ObjectFilePath << "\n";
|
||||
const ExecutableFunction EF(State.createTargetMachine(),
|
||||
getObjectFromFile(*ObjectFilePath));
|
||||
InstrBenchmark.Measurements = runMeasurements(EF, *Scratch, NumRepetitions);
|
||||
InstrBenchmark.Measurements = runMeasurements(EF, *Scratch);
|
||||
assert(InstrBenchmark.NumRepetitions > 0 && "invalid NumRepetitions");
|
||||
for (BenchmarkMeasure &BM : InstrBenchmark.Measurements) {
|
||||
// Scale the measurements by instruction.
|
||||
BM.PerInstructionValue /= InstrBenchmark.NumRepetitions;
|
||||
// Scale the measurements by snippet.
|
||||
BM.PerSnippetValue *= static_cast<double>(BC.Instructions.size()) /
|
||||
InstrBenchmark.NumRepetitions;
|
||||
}
|
||||
|
||||
return InstrBenchmark;
|
||||
}
|
||||
|
|
|
@ -69,8 +69,8 @@ protected:
|
|||
|
||||
private:
|
||||
virtual std::vector<BenchmarkMeasure>
|
||||
runMeasurements(const ExecutableFunction &EF, ScratchSpace &Scratch,
|
||||
const unsigned NumRepetitions) const = 0;
|
||||
runMeasurements(const ExecutableFunction &EF,
|
||||
ScratchSpace &Scratch) const = 0;
|
||||
|
||||
llvm::Expected<std::string>
|
||||
writeObjectFile(const BenchmarkCode &Configuration,
|
||||
|
|
|
@ -53,7 +53,7 @@ bool InstructionBenchmarkClustering::isNeighbour(
|
|||
const std::vector<BenchmarkMeasure> &Q) const {
|
||||
double DistanceSquared = 0.0;
|
||||
for (size_t I = 0, E = P.size(); I < E; ++I) {
|
||||
const auto Diff = P[I].Value - Q[I].Value;
|
||||
const auto Diff = P[I].PerInstructionValue - Q[I].PerInstructionValue;
|
||||
DistanceSquared += Diff * Diff;
|
||||
}
|
||||
return DistanceSquared <= EpsilonSquared_;
|
||||
|
|
|
@ -109,8 +109,7 @@ LatencyBenchmarkRunner::~LatencyBenchmarkRunner() = default;
|
|||
|
||||
std::vector<BenchmarkMeasure>
|
||||
LatencyBenchmarkRunner::runMeasurements(const ExecutableFunction &Function,
|
||||
ScratchSpace &Scratch,
|
||||
const unsigned NumRepetitions) const {
|
||||
ScratchSpace &Scratch) const {
|
||||
// Cycle measurements include some overhead from the kernel. Repeat the
|
||||
// measure several times and take the minimum value.
|
||||
constexpr const int NumMeasurements = 30;
|
||||
|
@ -131,7 +130,8 @@ LatencyBenchmarkRunner::runMeasurements(const ExecutableFunction &Function,
|
|||
if (Value < MinLatency)
|
||||
MinLatency = Value;
|
||||
}
|
||||
return {{"latency", static_cast<double>(MinLatency) / NumRepetitions, ""}};
|
||||
return {{"latency", static_cast<double>(MinLatency),
|
||||
static_cast<double>(MinLatency), ""}};
|
||||
}
|
||||
|
||||
} // namespace exegesis
|
||||
|
|
|
@ -44,8 +44,8 @@ public:
|
|||
|
||||
private:
|
||||
std::vector<BenchmarkMeasure>
|
||||
runMeasurements(const ExecutableFunction &EF, ScratchSpace &Scratch,
|
||||
const unsigned NumRepetitions) const override;
|
||||
runMeasurements(const ExecutableFunction &EF,
|
||||
ScratchSpace &Scratch) const override;
|
||||
|
||||
virtual const char *getCounterName() const;
|
||||
};
|
||||
|
|
|
@ -252,8 +252,7 @@ UopsSnippetGenerator::generateCodeTemplate(unsigned Opcode) const {
|
|||
|
||||
std::vector<BenchmarkMeasure>
|
||||
UopsBenchmarkRunner::runMeasurements(const ExecutableFunction &Function,
|
||||
ScratchSpace &Scratch,
|
||||
const unsigned NumRepetitions) const {
|
||||
ScratchSpace &Scratch) const {
|
||||
const auto &SchedModel = State.getSubtargetInfo().getSchedModel();
|
||||
|
||||
std::vector<BenchmarkMeasure> Result;
|
||||
|
@ -281,7 +280,8 @@ UopsBenchmarkRunner::runMeasurements(const ExecutableFunction &Function,
|
|||
CounterValue += Counter.read();
|
||||
}
|
||||
Result.push_back({llvm::itostr(ProcResIdx),
|
||||
static_cast<double>(CounterValue) / NumRepetitions,
|
||||
static_cast<double>(CounterValue),
|
||||
static_cast<double>(CounterValue),
|
||||
SchedModel.getProcResource(ProcResIdx)->Name});
|
||||
}
|
||||
return Result;
|
||||
|
|
|
@ -71,8 +71,8 @@ public:
|
|||
|
||||
private:
|
||||
std::vector<BenchmarkMeasure>
|
||||
runMeasurements(const ExecutableFunction &EF, ScratchSpace &Scratch,
|
||||
const unsigned NumRepetitions) const override;
|
||||
runMeasurements(const ExecutableFunction &EF,
|
||||
ScratchSpace &Scratch) const override;
|
||||
};
|
||||
|
||||
} // namespace exegesis
|
||||
|
|
|
@ -26,13 +26,18 @@ TEST(ClusteringTest, Clusters3D) {
|
|||
std::vector<InstructionBenchmark> Points(6);
|
||||
|
||||
// Cluster around (x=0, y=1, z=2): points {0, 3}.
|
||||
Points[0].Measurements = {{"x", 0.01, ""}, {"y", 1.02, ""}, {"z", 1.98, "A"}};
|
||||
Points[3].Measurements = {{"x", -0.01, ""}, {"y", 1.02, ""}, {"z", 1.98, ""}};
|
||||
Points[0].Measurements = {
|
||||
{"x", 0.01, 0.0, ""}, {"y", 1.02, 0.0, ""}, {"z", 1.98, 0.0, "A"}};
|
||||
Points[3].Measurements = {
|
||||
{"x", -0.01, 0.0, ""}, {"y", 1.02, 0.0, ""}, {"z", 1.98, 0.0, ""}};
|
||||
// Cluster around (x=1, y=1, z=2): points {1, 4}.
|
||||
Points[1].Measurements = {{"x", 1.01, ""}, {"y", 1.02, ""}, {"z", 1.98, ""}};
|
||||
Points[4].Measurements = {{"x", 0.99, ""}, {"y", 1.02, ""}, {"z", 1.98, ""}};
|
||||
Points[1].Measurements = {
|
||||
{"x", 1.01, 0.0, ""}, {"y", 1.02, 0.0, ""}, {"z", 1.98, 0.0, ""}};
|
||||
Points[4].Measurements = {
|
||||
{"x", 0.99, 0.0, ""}, {"y", 1.02, 0.0, ""}, {"z", 1.98, 0.0, ""}};
|
||||
// Cluster around (x=0, y=0, z=0): points {5}, marked as noise.
|
||||
Points[5].Measurements = {{"x", 0.0, ""}, {"y", 0.01, ""}, {"z", -0.02, ""}};
|
||||
Points[5].Measurements = {
|
||||
{"x", 0.0, 0.0, ""}, {"y", 0.01, 0.0, ""}, {"z", -0.02, 0.0, ""}};
|
||||
// Error cluster: points {2}
|
||||
Points[2].Error = "oops";
|
||||
|
||||
|
@ -64,8 +69,9 @@ TEST(ClusteringTest, Clusters3D) {
|
|||
|
||||
TEST(ClusteringTest, Clusters3D_InvalidSize) {
|
||||
std::vector<InstructionBenchmark> Points(6);
|
||||
Points[0].Measurements = {{"x", 0.01, ""}, {"y", 1.02, ""}, {"z", 1.98, ""}};
|
||||
Points[1].Measurements = {{"y", 1.02, ""}, {"z", 1.98, ""}};
|
||||
Points[0].Measurements = {
|
||||
{"x", 0.01, 0.0, ""}, {"y", 1.02, 0.0, ""}, {"z", 1.98, 0.0, ""}};
|
||||
Points[1].Measurements = {{"y", 1.02, 0.0, ""}, {"z", 1.98, 0.0, ""}};
|
||||
auto Error =
|
||||
InstructionBenchmarkClustering::create(Points, 2, 0.25).takeError();
|
||||
ASSERT_TRUE((bool)Error);
|
||||
|
@ -74,8 +80,8 @@ TEST(ClusteringTest, Clusters3D_InvalidSize) {
|
|||
|
||||
TEST(ClusteringTest, Clusters3D_InvalidOrder) {
|
||||
std::vector<InstructionBenchmark> Points(6);
|
||||
Points[0].Measurements = {{"x", 0.01, ""}, {"y", 1.02, ""}};
|
||||
Points[1].Measurements = {{"y", 1.02, ""}, {"x", 1.98, ""}};
|
||||
Points[0].Measurements = {{"x", 0.01, 0.0, ""}, {"y", 1.02, 0.0, ""}};
|
||||
Points[1].Measurements = {{"y", 1.02, 0.0, ""}, {"x", 1.98, 0.0, ""}};
|
||||
auto Error =
|
||||
InstructionBenchmarkClustering::create(Points, 2, 0.25).takeError();
|
||||
ASSERT_TRUE((bool)Error);
|
||||
|
|
|
@ -28,7 +28,8 @@ using ::testing::Property;
|
|||
namespace exegesis {
|
||||
|
||||
bool operator==(const BenchmarkMeasure &A, const BenchmarkMeasure &B) {
|
||||
return std::tie(A.Key, A.Value) == std::tie(B.Key, B.Value);
|
||||
return std::tie(A.Key, A.PerInstructionValue, A.PerSnippetValue) ==
|
||||
std::tie(B.Key, B.PerInstructionValue, B.PerSnippetValue);
|
||||
}
|
||||
|
||||
static std::string Dump(const llvm::MCInst &McInst) {
|
||||
|
@ -75,8 +76,8 @@ TEST(BenchmarkResultTest, WriteToAndReadFromDisk) {
|
|||
ToDisk.CpuName = "cpu_name";
|
||||
ToDisk.LLVMTriple = "llvm_triple";
|
||||
ToDisk.NumRepetitions = 1;
|
||||
ToDisk.Measurements.push_back(BenchmarkMeasure{"a", 1, "debug a"});
|
||||
ToDisk.Measurements.push_back(BenchmarkMeasure{"b", 2, ""});
|
||||
ToDisk.Measurements.push_back(BenchmarkMeasure{"a", 1, 1, "debug a"});
|
||||
ToDisk.Measurements.push_back(BenchmarkMeasure{"b", 2, 2, ""});
|
||||
ToDisk.Error = "error";
|
||||
ToDisk.Info = "info";
|
||||
|
||||
|
@ -123,12 +124,12 @@ TEST(BenchmarkResultTest, WriteToAndReadFromDisk) {
|
|||
}
|
||||
}
|
||||
|
||||
TEST(BenchmarkResultTest, BenchmarkMeasureStats) {
|
||||
BenchmarkMeasureStats Stats;
|
||||
Stats.push(BenchmarkMeasure{"a", 0.5, "debug a"});
|
||||
Stats.push(BenchmarkMeasure{"a", 1.5, "debug a"});
|
||||
Stats.push(BenchmarkMeasure{"a", -1.0, "debug a"});
|
||||
Stats.push(BenchmarkMeasure{"a", 0.0, "debug a"});
|
||||
TEST(BenchmarkResultTest, PerInstructionStats) {
|
||||
PerInstructionStats Stats;
|
||||
Stats.push(BenchmarkMeasure{"a", 0.5, 0.0, "debug a"});
|
||||
Stats.push(BenchmarkMeasure{"a", 1.5, 0.0, "debug a"});
|
||||
Stats.push(BenchmarkMeasure{"a", -1.0, 0.0, "debug a"});
|
||||
Stats.push(BenchmarkMeasure{"a", 0.0, 0.0, "debug a"});
|
||||
EXPECT_EQ(Stats.min(), -1.0);
|
||||
EXPECT_EQ(Stats.max(), 1.5);
|
||||
EXPECT_EQ(Stats.avg(), 0.25); // (0.5+1.5-1.0+0.0) / 4
|
||||
|
|
Loading…
Reference in New Issue