From 17f59057d636393d3e41cd326d6b2a6d49fe2f80 Mon Sep 17 00:00:00 2001 From: Louis Fortier-Dubois Date: Wed, 22 Nov 2023 11:50:27 -0500 Subject: [PATCH] Feat/backend comparison/persistence (#979) * setting up * wip * persistence works * cleanup * clippy * run checks * Cleanup * reverse json order --------- Co-authored-by: nathaniel --- backend-comparison/Cargo.toml | 2 + backend-comparison/benches/binary.rs | 7 +- backend-comparison/benches/custom_gelu.rs | 23 +++-- backend-comparison/benches/data.rs | 8 +- backend-comparison/benches/matmul.rs | 4 +- backend-comparison/benches/unary.rs | 4 +- backend-comparison/src/lib.rs | 2 + backend-comparison/src/persistence/base.rs | 100 +++++++++++++++++++++ backend-comparison/src/persistence/mod.rs | 2 + burn-common/src/benchmark.rs | 57 +++++++++--- burn-compute/src/tune/tuner.rs | 8 +- 11 files changed, 184 insertions(+), 33 deletions(-) create mode 100644 backend-comparison/src/persistence/base.rs create mode 100644 backend-comparison/src/persistence/mod.rs diff --git a/backend-comparison/Cargo.toml b/backend-comparison/Cargo.toml index 960fadc2b..2aeed8920 100644 --- a/backend-comparison/Cargo.toml +++ b/backend-comparison/Cargo.toml @@ -28,6 +28,8 @@ burn = { path = "../burn" } derive-new = { workspace = true } rand = { workspace = true } burn-common = { path = "../burn-common", version = "0.11.0" } +serde_json = { workspace = true } +dirs = "5.0.1" [dev-dependencies] diff --git a/backend-comparison/benches/binary.rs b/backend-comparison/benches/binary.rs index cb5b3264f..b06a1ccd4 100644 --- a/backend-comparison/benches/binary.rs +++ b/backend-comparison/benches/binary.rs @@ -1,3 +1,4 @@ +use backend_comparison::persistence::Persistence; use burn::tensor::{backend::Backend, Distribution, Shape, Tensor}; use burn_common::benchmark::{run_benchmark, Benchmark}; @@ -35,11 +36,13 @@ impl Benchmark for BinaryBenchmark { #[allow(dead_code)] fn bench(device: &B::Device) { - run_benchmark(BinaryBenchmark:: { + let benchmark = BinaryBenchmark:: { shape: [32, 512, 1024].into(), num_repeats: 10, device: device.clone(), - }) + }; + + Persistence::persist::(vec![run_benchmark(benchmark)], device) } fn main() { diff --git a/backend-comparison/benches/custom_gelu.rs b/backend-comparison/benches/custom_gelu.rs index 71db646a9..65b3ea30a 100644 --- a/backend-comparison/benches/custom_gelu.rs +++ b/backend-comparison/benches/custom_gelu.rs @@ -1,3 +1,4 @@ +use backend_comparison::persistence::Persistence; use burn::tensor::{backend::Backend, Distribution, Shape, Tensor}; use burn_common::benchmark::{run_benchmark, Benchmark}; use core::f64::consts::SQRT_2; @@ -88,25 +89,33 @@ fn bench(device: &B::Device) { let shape: Shape = [32, 512, 2048].into(); let num_repeats = 1; - println!("Backend {}", B::name()); - run_benchmark(CustomGeluBenchmark::::new( + let reference_gelu = CustomGeluBenchmark::::new( shape.clone(), num_repeats, device.clone(), GeluKind::Reference, - )); - run_benchmark(CustomGeluBenchmark::::new( + ); + let reference_erf_gelu = CustomGeluBenchmark::::new( shape.clone(), num_repeats, device.clone(), GeluKind::WithReferenceErf, - )); - run_benchmark(CustomGeluBenchmark::::new( + ); + let custom_erf_gelu = CustomGeluBenchmark::::new( shape, num_repeats, device.clone(), GeluKind::WithCustomErf, - )); + ); + + Persistence::persist::( + vec![ + run_benchmark(reference_gelu), + run_benchmark(reference_erf_gelu), + run_benchmark(custom_erf_gelu), + ], + device, + ) } fn main() { diff --git a/backend-comparison/benches/data.rs b/backend-comparison/benches/data.rs index e9379b393..571a08b23 100644 --- a/backend-comparison/benches/data.rs +++ b/backend-comparison/benches/data.rs @@ -1,3 +1,4 @@ +use backend_comparison::persistence::Persistence; use burn::tensor::{backend::Backend, Data, Distribution, Shape, Tensor}; use burn_common::benchmark::{run_benchmark, Benchmark}; use derive_new::new; @@ -76,9 +77,10 @@ fn bench(device: &B::Device) { let to_benchmark = ToDataBenchmark::::new(shape.clone(), num_repeats, device.clone()); let from_benchmark = FromDataBenchmark::::new(shape, num_repeats, device.clone()); - println!("Backend {}", B::name()); - run_benchmark(to_benchmark); - run_benchmark(from_benchmark) + Persistence::persist::( + vec![run_benchmark(to_benchmark), run_benchmark(from_benchmark)], + device, + ) } fn main() { diff --git a/backend-comparison/benches/matmul.rs b/backend-comparison/benches/matmul.rs index 7574e2197..24f39eeed 100644 --- a/backend-comparison/benches/matmul.rs +++ b/backend-comparison/benches/matmul.rs @@ -1,3 +1,4 @@ +use backend_comparison::persistence::Persistence; use burn::tensor::{backend::Backend, Distribution, Shape, Tensor}; use burn_common::benchmark::{run_benchmark, Benchmark}; use derive_new::new; @@ -56,8 +57,7 @@ fn bench(device: &B::Device) { let shape_rhs = [batch_size, k, n].into(); let benchmark = MatmulBenchmark::::new(shape_lhs, shape_rhs, num_repeats, device.clone()); - println!("Backend {}", B::name()); - run_benchmark(benchmark); + Persistence::persist::(vec![run_benchmark(benchmark)], device) } fn main() { diff --git a/backend-comparison/benches/unary.rs b/backend-comparison/benches/unary.rs index 4befcdd2a..924d7a3b3 100644 --- a/backend-comparison/benches/unary.rs +++ b/backend-comparison/benches/unary.rs @@ -1,3 +1,4 @@ +use backend_comparison::persistence::Persistence; use burn::tensor::{backend::Backend, Distribution, Shape, Tensor}; use burn_common::benchmark::{run_benchmark, Benchmark}; use derive_new::new; @@ -40,8 +41,7 @@ fn bench(device: &B::Device) { let benchmark = UnaryBenchmark::::new(shape, num_repeats, device.clone()); - println!("Backend {}", B::name()); - run_benchmark(benchmark) + Persistence::persist::(vec![run_benchmark(benchmark)], device) } fn main() { diff --git a/backend-comparison/src/lib.rs b/backend-comparison/src/lib.rs index 065b50f41..77ac9cf44 100644 --- a/backend-comparison/src/lib.rs +++ b/backend-comparison/src/lib.rs @@ -1,3 +1,5 @@ +pub mod persistence; + #[macro_export] macro_rules! bench_on_backend { () => { diff --git a/backend-comparison/src/persistence/base.rs b/backend-comparison/src/persistence/base.rs new file mode 100644 index 000000000..470f5dcb8 --- /dev/null +++ b/backend-comparison/src/persistence/base.rs @@ -0,0 +1,100 @@ +use std::{ + collections::HashMap, + fs::{create_dir_all, File}, + path::PathBuf, + time::Duration, +}; + +use burn::tensor::backend::Backend; +use burn_common::benchmark::BenchmarkResult; +use dirs; +use serde_json; + +type BenchmarkCommitResults = HashMap; +type BenchmarkOpResults = HashMap; +type BenchmarkBackendResults = HashMap; +type StampedBenchmarks = HashMap>; + +#[derive(Default)] +pub struct Persistence { + results: BenchmarkCommitResults, +} + +impl Persistence { + /// Updates the cached backend comparison json file with new benchmarks results. + /// + /// The file has the following structure: + /// + /// { + /// "GIT_COMMIT_HASH": + /// { + /// "BENCHMARK_NAME (OP + SHAPE)": { + /// "BACKEND_NAME-DEVICE": { + /// "TIMESTAMP": \[ + /// DURATIONS + /// \] + /// } + /// } + /// } + /// } + pub fn persist(benches: Vec, device: &B::Device) { + for bench in benches.iter() { + println!("{}", bench); + } + let cache_file = dirs::home_dir() + .expect("Could not get home directory") + .join(".cache") + .join("backend-comparison") + .join("db.json"); + + let mut cache = Self::load(&cache_file); + cache.update::(device, benches); + cache.save(&cache_file); + println!("Persisting to {:?}", cache_file); + } + + /// Load the cache from disk. + fn load(path: &PathBuf) -> Self { + let results = match File::open(path) { + Ok(file) => serde_json::from_reader(file) + .expect("Should have parsed to BenchmarkCommitResults struct"), + Err(_) => HashMap::default(), + }; + + Self { results } + } + + /// Save the cache on disk. + fn save(&self, path: &PathBuf) { + if let Some(parent) = path.parent() { + create_dir_all(parent).expect("Unable to create directory"); + } + let file = File::create(path).expect("Unable to create backend comparison file"); + + serde_json::to_writer_pretty(file, &self.results) + .expect("Unable to write to backend comparison file"); + } + + /// Update the cache with the given [benchmark results](BenchmarkResult). + /// + /// Assumes only that benches share the same backend and device. + /// It could run faster if we assumed they have the same git hash + fn update(&mut self, device: &B::Device, benches: Vec) { + let backend_key = format!("{}-{:?}", B::name(), device); + + for bench in benches { + let mut benchmark_op_results = self.results.remove(&bench.git_hash).unwrap_or_default(); + let mut benchmark_backend_results = + benchmark_op_results.remove(&bench.name).unwrap_or_default(); + + let mut stamped_benchmarks = benchmark_backend_results + .remove(&backend_key) + .unwrap_or_default(); + + stamped_benchmarks.insert(bench.timestamp, bench.durations.durations); + benchmark_backend_results.insert(backend_key.clone(), stamped_benchmarks); + benchmark_op_results.insert(bench.name, benchmark_backend_results); + self.results.insert(bench.git_hash, benchmark_op_results); + } + } +} diff --git a/backend-comparison/src/persistence/mod.rs b/backend-comparison/src/persistence/mod.rs new file mode 100644 index 000000000..096c94ead --- /dev/null +++ b/backend-comparison/src/persistence/mod.rs @@ -0,0 +1,2 @@ +mod base; +pub use base::*; diff --git a/burn-common/src/benchmark.rs b/burn-common/src/benchmark.rs index a4abba2f3..f843bf8c9 100644 --- a/burn-common/src/benchmark.rs +++ b/burn-common/src/benchmark.rs @@ -9,11 +9,12 @@ use std::time::Instant; /// Results of a benchmark run. #[derive(new, Debug)] -pub struct BenchmarkResult { - durations: Vec, +pub struct BenchmarkDurations { + /// All durations of the run, in the order they were benchmarked + pub durations: Vec, } -impl BenchmarkResult { +impl BenchmarkDurations { /// Returns the median duration among all durations pub fn median_duration(&self) -> Duration { let mut sorted = self.durations.clone(); @@ -25,7 +26,7 @@ impl BenchmarkResult { } } -impl Display for BenchmarkResult { +impl Display for BenchmarkDurations { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { let mean = self.mean_duration(); let var = self @@ -87,7 +88,7 @@ pub trait Benchmark { /// Wait for computations to be over fn sync(&self); /// Run the benchmark a number of times. - fn run(&self) -> BenchmarkResult { + fn run(&self) -> BenchmarkDurations { #[cfg(not(feature = "std"))] panic!("Attempting to run benchmark in a no-std environment"); @@ -114,14 +115,42 @@ pub trait Benchmark { durations.push(end - start); } - BenchmarkResult { durations } + BenchmarkDurations { durations } } } } +/// Result of a benchmark run, with metadata +pub struct BenchmarkResult { + /// Individual results of the run + pub durations: BenchmarkDurations, + /// Time just before the run + pub timestamp: u128, + /// Git commit hash of the commit in which the run occurred + pub git_hash: String, + /// Name of the benchmark, normally with operation name and shapes + pub name: String, +} + +impl Display for BenchmarkResult { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + f.write_str( + format!( + " + Timestamp: {} + Git Hash: {} + Benchmarking - {}{} + ", + self.timestamp, self.git_hash, self.name, self.durations + ) + .as_str(), + ) + } +} + #[cfg(feature = "std")] /// Runs the given benchmark on the device and prints result and information. -pub fn run_benchmark(benchmark: BM) +pub fn run_benchmark(benchmark: BM) -> BenchmarkResult where BM: Benchmark, { @@ -130,12 +159,14 @@ where .unwrap() .as_millis(); let output = std::process::Command::new("git") - .args(["rev-porse", "HEAD"]) + .args(["rev-parse", "HEAD"]) .output() .unwrap(); - let git_hash = String::from_utf8(output.stdout).unwrap(); - - println!("Timestamp: {}", timestamp); - println!("Git Hash: {}", str::trim(&git_hash)); - println!("Benchmarking - {}{}", benchmark.name(), benchmark.run()); + let git_hash = String::from_utf8(output.stdout).unwrap().trim().to_string(); + BenchmarkResult { + timestamp, + git_hash, + name: benchmark.name(), + durations: benchmark.run(), + } } diff --git a/burn-compute/src/tune/tuner.rs b/burn-compute/src/tune/tuner.rs index c9a9afeb8..d18e1294a 100644 --- a/burn-compute/src/tune/tuner.rs +++ b/burn-compute/src/tune/tuner.rs @@ -4,7 +4,7 @@ use core::time::Duration; use alloc::boxed::Box; use alloc::string::ToString; use alloc::vec::Vec; -use burn_common::benchmark::{Benchmark, BenchmarkResult}; +use burn_common::benchmark::{Benchmark, BenchmarkDurations}; use crate::channel::ComputeChannel; use crate::client::ComputeClient; @@ -50,7 +50,7 @@ impl> Tuner { let mut names = Vec::with_capacity(autotunables.len()); // Run all autotune benchmarks - let results: Vec = autotunables + let results: Vec = autotunables .into_iter() .map(|op| { names.push(op.name().to_string()); @@ -78,11 +78,11 @@ impl> Tuner { &mut self, operation: Box, client: &ComputeClient, - ) -> BenchmarkResult { + ) -> BenchmarkDurations { TuneBenchmark::new(operation, client.clone()).run() } - fn find_fastest(&self, results: Vec) -> usize { + fn find_fastest(&self, results: Vec) -> usize { let mut smallest_duration = Duration::MAX; let mut fastest_tunable = None;