Feat/backend comparison/persistence (#979)

* setting up

* wip

* persistence works

* cleanup

* clippy

* run checks

* Cleanup

* reverse json order

---------

Co-authored-by: nathaniel <nathaniel.simard.42@gmail.com>
This commit is contained in:
Louis Fortier-Dubois 2023-11-22 11:50:27 -05:00 committed by GitHub
parent 630044e96b
commit 17f59057d6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 184 additions and 33 deletions

View File

@ -28,6 +28,8 @@ burn = { path = "../burn" }
derive-new = { workspace = true }
rand = { workspace = true }
burn-common = { path = "../burn-common", version = "0.11.0" }
serde_json = { workspace = true }
dirs = "5.0.1"
[dev-dependencies]

View File

@ -1,3 +1,4 @@
use backend_comparison::persistence::Persistence;
use burn::tensor::{backend::Backend, Distribution, Shape, Tensor};
use burn_common::benchmark::{run_benchmark, Benchmark};
@ -35,11 +36,13 @@ impl<B: Backend, const D: usize> Benchmark for BinaryBenchmark<B, D> {
#[allow(dead_code)]
fn bench<B: Backend>(device: &B::Device) {
run_benchmark(BinaryBenchmark::<B, 3> {
let benchmark = BinaryBenchmark::<B, 3> {
shape: [32, 512, 1024].into(),
num_repeats: 10,
device: device.clone(),
})
};
Persistence::persist::<B>(vec![run_benchmark(benchmark)], device)
}
fn main() {

View File

@ -1,3 +1,4 @@
use backend_comparison::persistence::Persistence;
use burn::tensor::{backend::Backend, Distribution, Shape, Tensor};
use burn_common::benchmark::{run_benchmark, Benchmark};
use core::f64::consts::SQRT_2;
@ -88,25 +89,33 @@ fn bench<B: Backend>(device: &B::Device) {
let shape: Shape<D> = [32, 512, 2048].into();
let num_repeats = 1;
println!("Backend {}", B::name());
run_benchmark(CustomGeluBenchmark::<B, D>::new(
let reference_gelu = CustomGeluBenchmark::<B, D>::new(
shape.clone(),
num_repeats,
device.clone(),
GeluKind::Reference,
));
run_benchmark(CustomGeluBenchmark::<B, D>::new(
);
let reference_erf_gelu = CustomGeluBenchmark::<B, D>::new(
shape.clone(),
num_repeats,
device.clone(),
GeluKind::WithReferenceErf,
));
run_benchmark(CustomGeluBenchmark::<B, D>::new(
);
let custom_erf_gelu = CustomGeluBenchmark::<B, D>::new(
shape,
num_repeats,
device.clone(),
GeluKind::WithCustomErf,
));
);
Persistence::persist::<B>(
vec![
run_benchmark(reference_gelu),
run_benchmark(reference_erf_gelu),
run_benchmark(custom_erf_gelu),
],
device,
)
}
fn main() {

View File

@ -1,3 +1,4 @@
use backend_comparison::persistence::Persistence;
use burn::tensor::{backend::Backend, Data, Distribution, Shape, Tensor};
use burn_common::benchmark::{run_benchmark, Benchmark};
use derive_new::new;
@ -76,9 +77,10 @@ fn bench<B: Backend>(device: &B::Device) {
let to_benchmark = ToDataBenchmark::<B, D>::new(shape.clone(), num_repeats, device.clone());
let from_benchmark = FromDataBenchmark::<B, D>::new(shape, num_repeats, device.clone());
println!("Backend {}", B::name());
run_benchmark(to_benchmark);
run_benchmark(from_benchmark)
Persistence::persist::<B>(
vec![run_benchmark(to_benchmark), run_benchmark(from_benchmark)],
device,
)
}
fn main() {

View File

@ -1,3 +1,4 @@
use backend_comparison::persistence::Persistence;
use burn::tensor::{backend::Backend, Distribution, Shape, Tensor};
use burn_common::benchmark::{run_benchmark, Benchmark};
use derive_new::new;
@ -56,8 +57,7 @@ fn bench<B: Backend>(device: &B::Device) {
let shape_rhs = [batch_size, k, n].into();
let benchmark = MatmulBenchmark::<B, D>::new(shape_lhs, shape_rhs, num_repeats, device.clone());
println!("Backend {}", B::name());
run_benchmark(benchmark);
Persistence::persist::<B>(vec![run_benchmark(benchmark)], device)
}
fn main() {

View File

@ -1,3 +1,4 @@
use backend_comparison::persistence::Persistence;
use burn::tensor::{backend::Backend, Distribution, Shape, Tensor};
use burn_common::benchmark::{run_benchmark, Benchmark};
use derive_new::new;
@ -40,8 +41,7 @@ fn bench<B: Backend>(device: &B::Device) {
let benchmark = UnaryBenchmark::<B, D>::new(shape, num_repeats, device.clone());
println!("Backend {}", B::name());
run_benchmark(benchmark)
Persistence::persist::<B>(vec![run_benchmark(benchmark)], device)
}
fn main() {

View File

@ -1,3 +1,5 @@
pub mod persistence;
#[macro_export]
macro_rules! bench_on_backend {
() => {

View File

@ -0,0 +1,100 @@
use std::{
collections::HashMap,
fs::{create_dir_all, File},
path::PathBuf,
time::Duration,
};
use burn::tensor::backend::Backend;
use burn_common::benchmark::BenchmarkResult;
use dirs;
use serde_json;
type BenchmarkCommitResults = HashMap<String, BenchmarkOpResults>;
type BenchmarkOpResults = HashMap<String, BenchmarkBackendResults>;
type BenchmarkBackendResults = HashMap<String, StampedBenchmarks>;
type StampedBenchmarks = HashMap<u128, Vec<Duration>>;
#[derive(Default)]
pub struct Persistence {
results: BenchmarkCommitResults,
}
impl Persistence {
/// Updates the cached backend comparison json file with new benchmarks results.
///
/// The file has the following structure:
///
/// {
/// "GIT_COMMIT_HASH":
/// {
/// "BENCHMARK_NAME (OP + SHAPE)": {
/// "BACKEND_NAME-DEVICE": {
/// "TIMESTAMP": \[
/// DURATIONS
/// \]
/// }
/// }
/// }
/// }
pub fn persist<B: Backend>(benches: Vec<BenchmarkResult>, device: &B::Device) {
for bench in benches.iter() {
println!("{}", bench);
}
let cache_file = dirs::home_dir()
.expect("Could not get home directory")
.join(".cache")
.join("backend-comparison")
.join("db.json");
let mut cache = Self::load(&cache_file);
cache.update::<B>(device, benches);
cache.save(&cache_file);
println!("Persisting to {:?}", cache_file);
}
/// Load the cache from disk.
fn load(path: &PathBuf) -> Self {
let results = match File::open(path) {
Ok(file) => serde_json::from_reader(file)
.expect("Should have parsed to BenchmarkCommitResults struct"),
Err(_) => HashMap::default(),
};
Self { results }
}
/// Save the cache on disk.
fn save(&self, path: &PathBuf) {
if let Some(parent) = path.parent() {
create_dir_all(parent).expect("Unable to create directory");
}
let file = File::create(path).expect("Unable to create backend comparison file");
serde_json::to_writer_pretty(file, &self.results)
.expect("Unable to write to backend comparison file");
}
/// Update the cache with the given [benchmark results](BenchmarkResult).
///
/// Assumes only that benches share the same backend and device.
/// It could run faster if we assumed they have the same git hash
fn update<B: Backend>(&mut self, device: &B::Device, benches: Vec<BenchmarkResult>) {
let backend_key = format!("{}-{:?}", B::name(), device);
for bench in benches {
let mut benchmark_op_results = self.results.remove(&bench.git_hash).unwrap_or_default();
let mut benchmark_backend_results =
benchmark_op_results.remove(&bench.name).unwrap_or_default();
let mut stamped_benchmarks = benchmark_backend_results
.remove(&backend_key)
.unwrap_or_default();
stamped_benchmarks.insert(bench.timestamp, bench.durations.durations);
benchmark_backend_results.insert(backend_key.clone(), stamped_benchmarks);
benchmark_op_results.insert(bench.name, benchmark_backend_results);
self.results.insert(bench.git_hash, benchmark_op_results);
}
}
}

View File

@ -0,0 +1,2 @@
mod base;
pub use base::*;

View File

@ -9,11 +9,12 @@ use std::time::Instant;
/// Results of a benchmark run.
#[derive(new, Debug)]
pub struct BenchmarkResult {
durations: Vec<Duration>,
pub struct BenchmarkDurations {
/// All durations of the run, in the order they were benchmarked
pub durations: Vec<Duration>,
}
impl BenchmarkResult {
impl BenchmarkDurations {
/// Returns the median duration among all durations
pub fn median_duration(&self) -> Duration {
let mut sorted = self.durations.clone();
@ -25,7 +26,7 @@ impl BenchmarkResult {
}
}
impl Display for BenchmarkResult {
impl Display for BenchmarkDurations {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
let mean = self.mean_duration();
let var = self
@ -87,7 +88,7 @@ pub trait Benchmark {
/// Wait for computations to be over
fn sync(&self);
/// Run the benchmark a number of times.
fn run(&self) -> BenchmarkResult {
fn run(&self) -> BenchmarkDurations {
#[cfg(not(feature = "std"))]
panic!("Attempting to run benchmark in a no-std environment");
@ -114,14 +115,42 @@ pub trait Benchmark {
durations.push(end - start);
}
BenchmarkResult { durations }
BenchmarkDurations { durations }
}
}
}
/// Result of a benchmark run, with metadata
pub struct BenchmarkResult {
/// Individual results of the run
pub durations: BenchmarkDurations,
/// Time just before the run
pub timestamp: u128,
/// Git commit hash of the commit in which the run occurred
pub git_hash: String,
/// Name of the benchmark, normally with operation name and shapes
pub name: String,
}
impl Display for BenchmarkResult {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
f.write_str(
format!(
"
Timestamp: {}
Git Hash: {}
Benchmarking - {}{}
",
self.timestamp, self.git_hash, self.name, self.durations
)
.as_str(),
)
}
}
#[cfg(feature = "std")]
/// Runs the given benchmark on the device and prints result and information.
pub fn run_benchmark<BM>(benchmark: BM)
pub fn run_benchmark<BM>(benchmark: BM) -> BenchmarkResult
where
BM: Benchmark,
{
@ -130,12 +159,14 @@ where
.unwrap()
.as_millis();
let output = std::process::Command::new("git")
.args(["rev-porse", "HEAD"])
.args(["rev-parse", "HEAD"])
.output()
.unwrap();
let git_hash = String::from_utf8(output.stdout).unwrap();
println!("Timestamp: {}", timestamp);
println!("Git Hash: {}", str::trim(&git_hash));
println!("Benchmarking - {}{}", benchmark.name(), benchmark.run());
let git_hash = String::from_utf8(output.stdout).unwrap().trim().to_string();
BenchmarkResult {
timestamp,
git_hash,
name: benchmark.name(),
durations: benchmark.run(),
}
}

View File

@ -4,7 +4,7 @@ use core::time::Duration;
use alloc::boxed::Box;
use alloc::string::ToString;
use alloc::vec::Vec;
use burn_common::benchmark::{Benchmark, BenchmarkResult};
use burn_common::benchmark::{Benchmark, BenchmarkDurations};
use crate::channel::ComputeChannel;
use crate::client::ComputeClient;
@ -50,7 +50,7 @@ impl<S: ComputeServer, C: ComputeChannel<S>> Tuner<S, C> {
let mut names = Vec::with_capacity(autotunables.len());
// Run all autotune benchmarks
let results: Vec<BenchmarkResult> = autotunables
let results: Vec<BenchmarkDurations> = autotunables
.into_iter()
.map(|op| {
names.push(op.name().to_string());
@ -78,11 +78,11 @@ impl<S: ComputeServer, C: ComputeChannel<S>> Tuner<S, C> {
&mut self,
operation: Box<dyn AutotuneOperation>,
client: &ComputeClient<S, C>,
) -> BenchmarkResult {
) -> BenchmarkDurations {
TuneBenchmark::new(operation, client.clone()).run()
}
fn find_fastest(&self, results: Vec<BenchmarkResult>) -> usize {
fn find_fastest(&self, results: Vec<BenchmarkDurations>) -> usize {
let mut smallest_duration = Duration::MAX;
let mut fastest_tunable = None;