Feat/backend comparison/persistence (#979)

* setting up * wip * persistence works * cleanup * clippy * run checks * Cleanup * reverse json order --------- Co-authored-by: nathaniel <nathaniel.simard.42@gmail.com>
2023-11-22 11:50:27 -05:00 · 2023-11-22 11:50:27 -05:00 · 17f59057d6
parent 630044e96b
commit 17f59057d6
11 changed files with 184 additions and 33 deletions
--- a/backend-comparison/Cargo.toml
+++ b/backend-comparison/Cargo.toml
@ -28,6 +28,8 @@ burn = { path = "../burn" }
 derive-new = { workspace = true }
 rand = { workspace = true }
 burn-common = { path = "../burn-common", version = "0.11.0" }
+serde_json = { workspace = true }
+dirs = "5.0.1"

 [dev-dependencies]

--- a/backend-comparison/benches/binary.rs
+++ b/backend-comparison/benches/binary.rs
@ -1,3 +1,4 @@
+use backend_comparison::persistence::Persistence;
 use burn::tensor::{backend::Backend, Distribution, Shape, Tensor};
 use burn_common::benchmark::{run_benchmark, Benchmark};

@ -35,11 +36,13 @@ impl<B: Backend, const D: usize> Benchmark for BinaryBenchmark<B, D> {

 #[allow(dead_code)]
 fn bench<B: Backend>(device: &B::Device) {
-    run_benchmark(BinaryBenchmark::<B, 3> {
+    let benchmark = BinaryBenchmark::<B, 3> {
        shape: [32, 512, 1024].into(),
        num_repeats: 10,
        device: device.clone(),
-    })
+    };
+
+    Persistence::persist::<B>(vec![run_benchmark(benchmark)], device)
 }

 fn main() {
--- a/backend-comparison/benches/custom_gelu.rs
+++ b/backend-comparison/benches/custom_gelu.rs
@ -1,3 +1,4 @@
+use backend_comparison::persistence::Persistence;
 use burn::tensor::{backend::Backend, Distribution, Shape, Tensor};
 use burn_common::benchmark::{run_benchmark, Benchmark};
 use core::f64::consts::SQRT_2;
@ -88,25 +89,33 @@ fn bench<B: Backend>(device: &B::Device) {
    let shape: Shape<D> = [32, 512, 2048].into();
    let num_repeats = 1;

-    println!("Backend {}", B::name());
-    run_benchmark(CustomGeluBenchmark::<B, D>::new(
+    let reference_gelu = CustomGeluBenchmark::<B, D>::new(
        shape.clone(),
        num_repeats,
        device.clone(),
        GeluKind::Reference,
-    ));
-    run_benchmark(CustomGeluBenchmark::<B, D>::new(
+    );
+    let reference_erf_gelu = CustomGeluBenchmark::<B, D>::new(
        shape.clone(),
        num_repeats,
        device.clone(),
        GeluKind::WithReferenceErf,
-    ));
-    run_benchmark(CustomGeluBenchmark::<B, D>::new(
+    );
+    let custom_erf_gelu = CustomGeluBenchmark::<B, D>::new(
        shape,
        num_repeats,
        device.clone(),
        GeluKind::WithCustomErf,
-    ));
+    );
+
+    Persistence::persist::<B>(
+        vec![
+            run_benchmark(reference_gelu),
+            run_benchmark(reference_erf_gelu),
+            run_benchmark(custom_erf_gelu),
+        ],
+        device,
+    )
 }

 fn main() {
--- a/backend-comparison/benches/data.rs
+++ b/backend-comparison/benches/data.rs
@ -1,3 +1,4 @@
+use backend_comparison::persistence::Persistence;
 use burn::tensor::{backend::Backend, Data, Distribution, Shape, Tensor};
 use burn_common::benchmark::{run_benchmark, Benchmark};
 use derive_new::new;
@ -76,9 +77,10 @@ fn bench<B: Backend>(device: &B::Device) {
    let to_benchmark = ToDataBenchmark::<B, D>::new(shape.clone(), num_repeats, device.clone());
    let from_benchmark = FromDataBenchmark::<B, D>::new(shape, num_repeats, device.clone());

-    println!("Backend {}", B::name());
-    run_benchmark(to_benchmark);
-    run_benchmark(from_benchmark)
+    Persistence::persist::<B>(
+        vec![run_benchmark(to_benchmark), run_benchmark(from_benchmark)],
+        device,
+    )
 }

 fn main() {
--- a/backend-comparison/benches/matmul.rs
+++ b/backend-comparison/benches/matmul.rs
@ -1,3 +1,4 @@
+use backend_comparison::persistence::Persistence;
 use burn::tensor::{backend::Backend, Distribution, Shape, Tensor};
 use burn_common::benchmark::{run_benchmark, Benchmark};
 use derive_new::new;
@ -56,8 +57,7 @@ fn bench<B: Backend>(device: &B::Device) {
    let shape_rhs = [batch_size, k, n].into();

    let benchmark = MatmulBenchmark::<B, D>::new(shape_lhs, shape_rhs, num_repeats, device.clone());
-    println!("Backend {}", B::name());
-    run_benchmark(benchmark);
+    Persistence::persist::<B>(vec![run_benchmark(benchmark)], device)
 }

 fn main() {
--- a/backend-comparison/benches/unary.rs
+++ b/backend-comparison/benches/unary.rs
@ -1,3 +1,4 @@
+use backend_comparison::persistence::Persistence;
 use burn::tensor::{backend::Backend, Distribution, Shape, Tensor};
 use burn_common::benchmark::{run_benchmark, Benchmark};
 use derive_new::new;
@ -40,8 +41,7 @@ fn bench<B: Backend>(device: &B::Device) {

    let benchmark = UnaryBenchmark::<B, D>::new(shape, num_repeats, device.clone());

-    println!("Backend {}", B::name());
-    run_benchmark(benchmark)
+    Persistence::persist::<B>(vec![run_benchmark(benchmark)], device)
 }

 fn main() {
--- a/backend-comparison/src/lib.rs
+++ b/backend-comparison/src/lib.rs
@ -1,3 +1,5 @@
+pub mod persistence;
+
 #[macro_export]
 macro_rules! bench_on_backend {
    () => {
--- a/backend-comparison/src/persistence/base.rs
+++ b/backend-comparison/src/persistence/base.rs
@ -0,0 +1,100 @@
+use std::{
+    collections::HashMap,
+    fs::{create_dir_all, File},
+    path::PathBuf,
+    time::Duration,
+};
+
+use burn::tensor::backend::Backend;
+use burn_common::benchmark::BenchmarkResult;
+use dirs;
+use serde_json;
+
+type BenchmarkCommitResults = HashMap<String, BenchmarkOpResults>;
+type BenchmarkOpResults = HashMap<String, BenchmarkBackendResults>;
+type BenchmarkBackendResults = HashMap<String, StampedBenchmarks>;
+type StampedBenchmarks = HashMap<u128, Vec<Duration>>;
+
+#[derive(Default)]
+pub struct Persistence {
+    results: BenchmarkCommitResults,
+}
+
+impl Persistence {
+    /// Updates the cached backend comparison json file with new benchmarks results.
+    ///
+    /// The file has the following structure:
+    ///
+    ///  {
+    ///    "GIT_COMMIT_HASH":
+    ///      {
+    ///        "BENCHMARK_NAME (OP + SHAPE)": {
+    ///          "BACKEND_NAME-DEVICE": {
+    ///            "TIMESTAMP": \[
+    ///              DURATIONS
+    ///           \]
+    ///         }
+    ///       }
+    ///    }
+    ///  }
+    pub fn persist<B: Backend>(benches: Vec<BenchmarkResult>, device: &B::Device) {
+        for bench in benches.iter() {
+            println!("{}", bench);
+        }
+        let cache_file = dirs::home_dir()
+            .expect("Could not get home directory")
+            .join(".cache")
+            .join("backend-comparison")
+            .join("db.json");
+
+        let mut cache = Self::load(&cache_file);
+        cache.update::<B>(device, benches);
+        cache.save(&cache_file);
+        println!("Persisting to {:?}", cache_file);
+    }
+
+    /// Load the cache from disk.
+    fn load(path: &PathBuf) -> Self {
+        let results = match File::open(path) {
+            Ok(file) => serde_json::from_reader(file)
+                .expect("Should have parsed to BenchmarkCommitResults struct"),
+            Err(_) => HashMap::default(),
+        };
+
+        Self { results }
+    }
+
+    /// Save the cache on disk.
+    fn save(&self, path: &PathBuf) {
+        if let Some(parent) = path.parent() {
+            create_dir_all(parent).expect("Unable to create directory");
+        }
+        let file = File::create(path).expect("Unable to create backend comparison file");
+
+        serde_json::to_writer_pretty(file, &self.results)
+            .expect("Unable to write to backend comparison file");
+    }
+
+    /// Update the cache with the given [benchmark results](BenchmarkResult).
+    ///
+    /// Assumes only that benches share the same backend and device.
+    /// It could run faster if we assumed they have the same git hash
+    fn update<B: Backend>(&mut self, device: &B::Device, benches: Vec<BenchmarkResult>) {
+        let backend_key = format!("{}-{:?}", B::name(), device);
+
+        for bench in benches {
+            let mut benchmark_op_results = self.results.remove(&bench.git_hash).unwrap_or_default();
+            let mut benchmark_backend_results =
+                benchmark_op_results.remove(&bench.name).unwrap_or_default();
+
+            let mut stamped_benchmarks = benchmark_backend_results
+                .remove(&backend_key)
+                .unwrap_or_default();
+
+            stamped_benchmarks.insert(bench.timestamp, bench.durations.durations);
+            benchmark_backend_results.insert(backend_key.clone(), stamped_benchmarks);
+            benchmark_op_results.insert(bench.name, benchmark_backend_results);
+            self.results.insert(bench.git_hash, benchmark_op_results);
+        }
+    }
+}
--- a/backend-comparison/src/persistence/mod.rs
+++ b/backend-comparison/src/persistence/mod.rs
@ -0,0 +1,2 @@
+mod base;
+pub use base::*;
--- a/burn-common/src/benchmark.rs
+++ b/burn-common/src/benchmark.rs
@ -9,11 +9,12 @@ use std::time::Instant;

 /// Results of a benchmark run.
 #[derive(new, Debug)]
-pub struct BenchmarkResult {
-    durations: Vec<Duration>,
+pub struct BenchmarkDurations {
+    /// All durations of the run, in the order they were benchmarked
+    pub durations: Vec<Duration>,
 }

-impl BenchmarkResult {
+impl BenchmarkDurations {
    /// Returns the median duration among all durations
    pub fn median_duration(&self) -> Duration {
        let mut sorted = self.durations.clone();
@ -25,7 +26,7 @@ impl BenchmarkResult {
    }
 }

-impl Display for BenchmarkResult {
+impl Display for BenchmarkDurations {
    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
        let mean = self.mean_duration();
        let var = self
@ -87,7 +88,7 @@ pub trait Benchmark {
    /// Wait for computations to be over
    fn sync(&self);
    /// Run the benchmark a number of times.
-    fn run(&self) -> BenchmarkResult {
+    fn run(&self) -> BenchmarkDurations {
        #[cfg(not(feature = "std"))]
        panic!("Attempting to run benchmark in a no-std environment");

@ -114,14 +115,42 @@ pub trait Benchmark {
                durations.push(end - start);
            }

-            BenchmarkResult { durations }
+            BenchmarkDurations { durations }
        }
    }
 }

+/// Result of a benchmark run, with metadata
+pub struct BenchmarkResult {
+    /// Individual results of the run
+    pub durations: BenchmarkDurations,
+    /// Time just before the run
+    pub timestamp: u128,
+    /// Git commit hash of the commit in which the run occurred
+    pub git_hash: String,
+    /// Name of the benchmark, normally with operation name and shapes
+    pub name: String,
+}
+
+impl Display for BenchmarkResult {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        f.write_str(
+            format!(
+                "
+        Timestamp: {}
+        Git Hash: {}
+        Benchmarking - {}{}
+        ",
+                self.timestamp, self.git_hash, self.name, self.durations
+            )
+            .as_str(),
+        )
+    }
+}
+
 #[cfg(feature = "std")]
 /// Runs the given benchmark on the device and prints result and information.
-pub fn run_benchmark<BM>(benchmark: BM)
+pub fn run_benchmark<BM>(benchmark: BM) -> BenchmarkResult
 where
    BM: Benchmark,
 {
@ -130,12 +159,14 @@ where
        .unwrap()
        .as_millis();
    let output = std::process::Command::new("git")
-        .args(["rev-porse", "HEAD"])
+        .args(["rev-parse", "HEAD"])
        .output()
        .unwrap();
-    let git_hash = String::from_utf8(output.stdout).unwrap();
-
-    println!("Timestamp: {}", timestamp);
-    println!("Git Hash: {}", str::trim(&git_hash));
-    println!("Benchmarking - {}{}", benchmark.name(), benchmark.run());
+    let git_hash = String::from_utf8(output.stdout).unwrap().trim().to_string();
+    BenchmarkResult {
+        timestamp,
+        git_hash,
+        name: benchmark.name(),
+        durations: benchmark.run(),
+    }
 }
--- a/burn-compute/src/tune/tuner.rs
+++ b/burn-compute/src/tune/tuner.rs
@ -4,7 +4,7 @@ use core::time::Duration;
 use alloc::boxed::Box;
 use alloc::string::ToString;
 use alloc::vec::Vec;
-use burn_common::benchmark::{Benchmark, BenchmarkResult};
+use burn_common::benchmark::{Benchmark, BenchmarkDurations};

 use crate::channel::ComputeChannel;
 use crate::client::ComputeClient;
@ -50,7 +50,7 @@ impl<S: ComputeServer, C: ComputeChannel<S>> Tuner<S, C> {
        let mut names = Vec::with_capacity(autotunables.len());

        // Run all autotune benchmarks
-        let results: Vec<BenchmarkResult> = autotunables
+        let results: Vec<BenchmarkDurations> = autotunables
            .into_iter()
            .map(|op| {
                names.push(op.name().to_string());
@ -78,11 +78,11 @@ impl<S: ComputeServer, C: ComputeChannel<S>> Tuner<S, C> {
        &mut self,
        operation: Box<dyn AutotuneOperation>,
        client: &ComputeClient<S, C>,
-    ) -> BenchmarkResult {
+    ) -> BenchmarkDurations {
        TuneBenchmark::new(operation, client.clone()).run()
    }

-    fn find_fastest(&self, results: Vec<BenchmarkResult>) -> usize {
+    fn find_fastest(&self, results: Vec<BenchmarkDurations>) -> usize {
        let mut smallest_duration = Duration::MAX;
        let mut fastest_tunable = None;