Re-land r331622 "[llvm-exegesis] Add a library to cluster benchmark results."

Add missing move.

llvm-svn: 331624
This commit is contained in:
Clement Courbet 2018-05-07 09:09:48 +00:00
parent e9174bc2c8
commit 967154148d
5 changed files with 361 additions and 0 deletions

View File

@ -2,6 +2,7 @@ add_library(LLVMExegesis
STATIC
BenchmarkResult.cpp
BenchmarkRunner.cpp
Clustering.cpp
InMemoryAssembler.cpp
InstructionSnippetGenerator.cpp
Latency.cpp

View File

@ -0,0 +1,170 @@
//===-- Clustering.cpp ------------------------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "Clustering.h"
#include <string>
#include <unordered_set>
namespace exegesis {
// The clustering problem has the following characteristics:
// (A) - Low dimension (dimensions are typically proc resource units,
// typically < 10).
// (B) - Number of points : ~thousands (points are measurements of an MCInst)
// (C) - Number of clusters: ~tens.
// (D) - The number of clusters is not known /a priory/.
// (E) - The amount of noise is relatively small.
// The problem is rather small. In terms of algorithms, (D) disqualifies
// k-means and makes algorithms such as DBSCAN[1] or OPTICS[2] more applicable.
//
// We've used DBSCAN here because it's simple to implement. This is a pretty
// straightforward and inefficient implementation of the pseudocode in [2].
//
// [1] https://en.wikipedia.org/wiki/DBSCAN
// [2] https://en.wikipedia.org/wiki/OPTICS_algorithm
namespace {
// Finds the points at distance less than sqrt(EpsilonSquared) of Q (not
// including Q).
std::vector<size_t> rangeQuery(const std::vector<InstructionBenchmark> &Points,
const size_t Q, const double EpsilonSquared) {
std::vector<size_t> Neighbors;
const auto &QMeasurements = Points[Q].Measurements;
for (size_t P = 0, NumPoints = Points.size(); P < NumPoints; ++P) {
if (P == Q)
continue;
const auto &PMeasurements = Points[P].Measurements;
if (PMeasurements.empty()) // Error point.
continue;
double DistanceSquared = 0;
for (size_t I = 0, E = QMeasurements.size(); I < E; ++I) {
const auto Diff = PMeasurements[I].Value - QMeasurements[I].Value;
DistanceSquared += Diff * Diff;
}
if (DistanceSquared <= EpsilonSquared) {
Neighbors.push_back(P);
}
}
return Neighbors;
}
} // namespace
InstructionBenchmarkClustering::InstructionBenchmarkClustering()
: NoiseCluster_(ClusterId::noise()), ErrorCluster_(ClusterId::error()) {}
llvm::Error InstructionBenchmarkClustering::validateAndSetup(
const std::vector<InstructionBenchmark> &Points) {
ClusterIdForPoint_.resize(Points.size());
// Mark erroneous measurements out.
// All points must have the same number of dimensions, in the same order.
const std::vector<BenchmarkMeasure> *LastMeasurement = nullptr;
for (size_t P = 0, NumPoints = Points.size(); P < NumPoints; ++P) {
const auto &Point = Points[P];
if (!Point.Error.empty()) {
ClusterIdForPoint_[P] = ClusterId::error();
ErrorCluster_.PointIndices.push_back(P);
continue;
}
const auto *CurMeasurement = &Point.Measurements;
if (LastMeasurement) {
if (LastMeasurement->size() != CurMeasurement->size()) {
return llvm::make_error<llvm::StringError>(
"inconsistent measurement dimensions",
llvm::inconvertibleErrorCode());
}
for (size_t I = 0, E = LastMeasurement->size(); I < E; ++I) {
if (LastMeasurement->at(I).Key != CurMeasurement->at(I).Key) {
return llvm::make_error<llvm::StringError>(
"inconsistent measurement dimensions keys",
llvm::inconvertibleErrorCode());
}
}
}
LastMeasurement = CurMeasurement;
}
if (LastMeasurement) {
NumDimensions_ = LastMeasurement->size();
}
return llvm::Error::success();
}
void InstructionBenchmarkClustering::dbScan(
const std::vector<InstructionBenchmark> &Points, const size_t MinPts,
const double EpsilonSquared) {
for (size_t P = 0, NumPoints = Points.size(); P < NumPoints; ++P) {
if (!ClusterIdForPoint_[P].isUndef())
continue; // Previously processed in inner loop.
const auto Neighbors = rangeQuery(Points, P, EpsilonSquared);
if (Neighbors.size() + 1 < MinPts) { // Density check.
// The region around P is not dense enough to create a new cluster, mark
// as noise for now.
ClusterIdForPoint_[P] = ClusterId::noise();
continue;
}
// Create a new cluster, add P.
Clusters_.emplace_back(ClusterId::makeValid(Clusters_.size()));
Cluster &CurrentCluster = Clusters_.back();
ClusterIdForPoint_[P] = CurrentCluster.Id; /* Label initial point */
CurrentCluster.PointIndices.push_back(P);
// Process P's neighbors.
std::unordered_set<size_t> ToProcess(Neighbors.begin(), Neighbors.end());
while (!ToProcess.empty()) {
// Retrieve a point from the set.
const size_t Q = *ToProcess.begin();
ToProcess.erase(Q);
if (ClusterIdForPoint_[Q].isNoise()) {
// Change noise point to border point.
ClusterIdForPoint_[Q] = CurrentCluster.Id;
CurrentCluster.PointIndices.push_back(Q);
continue;
}
if (!ClusterIdForPoint_[Q].isUndef()) {
continue; // Previously processed.
}
// Add Q to the current custer.
ClusterIdForPoint_[Q] = CurrentCluster.Id;
CurrentCluster.PointIndices.push_back(Q);
// And extend to the neighbors of Q if the region is dense enough.
const auto Neighbors = rangeQuery(Points, Q, EpsilonSquared);
if (Neighbors.size() + 1 >= MinPts) {
ToProcess.insert(Neighbors.begin(), Neighbors.end());
}
}
}
// Add noisy points to noise cluster.
for (size_t P = 0, NumPoints = Points.size(); P < NumPoints; ++P) {
if (ClusterIdForPoint_[P].isNoise()) {
NoiseCluster_.PointIndices.push_back(P);
}
}
}
llvm::Expected<InstructionBenchmarkClustering>
InstructionBenchmarkClustering::create(
const std::vector<InstructionBenchmark> &Points, const size_t MinPts,
const double Epsilon) {
InstructionBenchmarkClustering Clustering;
if (auto Error = Clustering.validateAndSetup(Points)) {
return std::move(Error);
}
if (Clustering.ErrorCluster_.PointIndices.size() == Points.size()) {
return Clustering; // Nothing to cluster.
}
Clustering.dbScan(Points, MinPts, Epsilon * Epsilon);
return Clustering;
}
} // namespace exegesis

View File

@ -0,0 +1,103 @@
//===-- Clustering.h --------------------------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
///
/// \file
/// Utilities to compute benchmark result clusters.
///
//===----------------------------------------------------------------------===//
#ifndef LLVM_TOOLS_LLVM_EXEGESIS_CLUSTERING_H
#define LLVM_TOOLS_LLVM_EXEGESIS_CLUSTERING_H
#include "BenchmarkResult.h"
#include "llvm/Support/Error.h"
#include <vector>
namespace exegesis {
class InstructionBenchmarkClustering {
public:
// Clusters `Points` using DBSCAN with the given parameters. See the cc file
// for more explanations on the algorithm.
static llvm::Expected<InstructionBenchmarkClustering>
create(const std::vector<InstructionBenchmark> &Points, size_t MinPts,
double Epsilon);
class ClusterId {
public:
static ClusterId noise() { return ClusterId(kNoise); }
static ClusterId error() { return ClusterId(kError); }
static ClusterId makeValid(int Id) {
assert(Id >= 0);
return ClusterId(Id);
}
ClusterId() : Id_(kUndef) {}
bool operator==(const ClusterId &O) const { return Id_ == O.Id_; }
bool isValid() const { return Id_ >= 0; }
bool isUndef() const { return Id_ == kUndef; }
bool isNoise() const { return Id_ == kNoise; }
bool isError() const { return Id_ == kError; }
// Precondition: isValid().
size_t getId() const {
assert(isValid());
return static_cast<size_t>(Id_);
}
private:
explicit ClusterId(int Id) : Id_(Id) {}
static constexpr const int kUndef = -1;
static constexpr const int kNoise = -2;
static constexpr const int kError = -3;
int Id_;
};
struct Cluster {
Cluster() = delete;
explicit Cluster(const ClusterId &Id) : Id(Id) {}
const ClusterId Id;
// Indices of benchmarks within the cluster.
std::vector<int> PointIndices;
};
ClusterId getClusterIdForPoint(size_t P) const {
return ClusterIdForPoint_[P];
}
const Cluster &getCluster(ClusterId Id) const {
assert(!Id.isUndef() && "unlabeled cluster");
if (Id.isNoise()) {
return NoiseCluster_;
}
if (Id.isError()) {
return ErrorCluster_;
}
return Clusters_[Id.getId()];
}
const std::vector<Cluster> &getValidClusters() const { return Clusters_; }
private:
InstructionBenchmarkClustering();
llvm::Error validateAndSetup(const std::vector<InstructionBenchmark> &Points);
void dbScan(const std::vector<InstructionBenchmark> &Points, size_t MinPts,
double EpsilonSquared);
int NumDimensions_ = 0;
// ClusterForPoint_[P] is the cluster id for Points[P].
std::vector<ClusterId> ClusterIdForPoint_;
std::vector<Cluster> Clusters_;
Cluster NoiseCluster_;
Cluster ErrorCluster_;
};
} // namespace exegesis
#endif // LLVM_TOOLS_LLVM_EXEGESIS_CLUSTERING_H

View File

@ -12,6 +12,7 @@ set(LLVM_LINK_COMPONENTS
add_llvm_unittest(LLVMExegesisTests
BenchmarkResultTest.cpp
ClusteringTest.cpp
OperandGraphTest.cpp
PerfHelperTest.cpp
)

View File

@ -0,0 +1,86 @@
//===-- ClusteringTest.cpp --------------------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "Clustering.h"
#include "BenchmarkResult.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/raw_ostream.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
namespace exegesis {
namespace {
using testing::Field;
using testing::UnorderedElementsAre;
using testing::UnorderedElementsAreArray;
TEST(ClusteringTest, Clusters3D) {
std::vector<InstructionBenchmark> Points(6);
// Cluster around (x=0, y=1, z=2): points {0, 3}.
Points[0].Measurements = {{"x", 0.01, ""}, {"y", 1.02, ""}, {"z", 1.98, "A"}};
Points[3].Measurements = {{"x", -0.01, ""}, {"y", 1.02, ""}, {"z", 1.98, ""}};
// Cluster around (x=1, y=1, z=2): points {1, 4}.
Points[1].Measurements = {{"x", 1.01, ""}, {"y", 1.02, ""}, {"z", 1.98, ""}};
Points[4].Measurements = {{"x", 0.99, ""}, {"y", 1.02, ""}, {"z", 1.98, ""}};
// Cluster around (x=0, y=0, z=0): points {5}, marked as noise.
Points[5].Measurements = {{"x", 0.0, ""}, {"y", 0.01, ""}, {"z", -0.02, ""}};
// Error cluster: points {2}
Points[2].Error = "oops";
auto HasPoints = [](const std::vector<int> &Indices) {
return Field(&InstructionBenchmarkClustering::Cluster::PointIndices,
UnorderedElementsAreArray(Indices));
};
auto Clustering = InstructionBenchmarkClustering::create(Points, 2, 0.25);
ASSERT_TRUE((bool)Clustering);
EXPECT_THAT(Clustering.get().getValidClusters(),
UnorderedElementsAre(HasPoints({0, 3}), HasPoints({1, 4})));
EXPECT_THAT(Clustering.get().getCluster(
InstructionBenchmarkClustering::ClusterId::noise()),
HasPoints({5}));
EXPECT_THAT(Clustering.get().getCluster(
InstructionBenchmarkClustering::ClusterId::error()),
HasPoints({2}));
EXPECT_EQ(Clustering.get().getClusterIdForPoint(2),
InstructionBenchmarkClustering::ClusterId::error());
EXPECT_EQ(Clustering.get().getClusterIdForPoint(5),
InstructionBenchmarkClustering::ClusterId::noise());
EXPECT_EQ(Clustering.get().getClusterIdForPoint(0),
Clustering.get().getClusterIdForPoint(3));
EXPECT_EQ(Clustering.get().getClusterIdForPoint(1),
Clustering.get().getClusterIdForPoint(4));
}
TEST(ClusteringTest, Clusters3D_InvalidSize) {
std::vector<InstructionBenchmark> Points(6);
Points[0].Measurements = {{"x", 0.01, ""}, {"y", 1.02, ""}, {"z", 1.98, ""}};
Points[1].Measurements = {{"y", 1.02, ""}, {"z", 1.98, ""}};
auto Error =
InstructionBenchmarkClustering::create(Points, 2, 0.25).takeError();
ASSERT_TRUE((bool)Error);
consumeError(std::move(Error));
}
TEST(ClusteringTest, Clusters3D_InvalidOrder) {
std::vector<InstructionBenchmark> Points(6);
Points[0].Measurements = {{"x", 0.01, ""}, {"y", 1.02, ""}};
Points[1].Measurements = {{"y", 1.02, ""}, {"x", 1.98, ""}};
auto Error =
InstructionBenchmarkClustering::create(Points, 2, 0.25).takeError();
ASSERT_TRUE((bool)Error);
consumeError(std::move(Error));
}
} // namespace
} // namespace exegesis