mirror of https://github.com/tracel-ai/burn.git
Feat/backend comparison/persistence (#979)
* setting up * wip * persistence works * cleanup * clippy * run checks * Cleanup * reverse json order --------- Co-authored-by: nathaniel <nathaniel.simard.42@gmail.com>
This commit is contained in:
parent
630044e96b
commit
17f59057d6
|
@ -28,6 +28,8 @@ burn = { path = "../burn" }
|
|||
derive-new = { workspace = true }
|
||||
rand = { workspace = true }
|
||||
burn-common = { path = "../burn-common", version = "0.11.0" }
|
||||
serde_json = { workspace = true }
|
||||
dirs = "5.0.1"
|
||||
|
||||
[dev-dependencies]
|
||||
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
use backend_comparison::persistence::Persistence;
|
||||
use burn::tensor::{backend::Backend, Distribution, Shape, Tensor};
|
||||
use burn_common::benchmark::{run_benchmark, Benchmark};
|
||||
|
||||
|
@ -35,11 +36,13 @@ impl<B: Backend, const D: usize> Benchmark for BinaryBenchmark<B, D> {
|
|||
|
||||
#[allow(dead_code)]
|
||||
fn bench<B: Backend>(device: &B::Device) {
|
||||
run_benchmark(BinaryBenchmark::<B, 3> {
|
||||
let benchmark = BinaryBenchmark::<B, 3> {
|
||||
shape: [32, 512, 1024].into(),
|
||||
num_repeats: 10,
|
||||
device: device.clone(),
|
||||
})
|
||||
};
|
||||
|
||||
Persistence::persist::<B>(vec![run_benchmark(benchmark)], device)
|
||||
}
|
||||
|
||||
fn main() {
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
use backend_comparison::persistence::Persistence;
|
||||
use burn::tensor::{backend::Backend, Distribution, Shape, Tensor};
|
||||
use burn_common::benchmark::{run_benchmark, Benchmark};
|
||||
use core::f64::consts::SQRT_2;
|
||||
|
@ -88,25 +89,33 @@ fn bench<B: Backend>(device: &B::Device) {
|
|||
let shape: Shape<D> = [32, 512, 2048].into();
|
||||
let num_repeats = 1;
|
||||
|
||||
println!("Backend {}", B::name());
|
||||
run_benchmark(CustomGeluBenchmark::<B, D>::new(
|
||||
let reference_gelu = CustomGeluBenchmark::<B, D>::new(
|
||||
shape.clone(),
|
||||
num_repeats,
|
||||
device.clone(),
|
||||
GeluKind::Reference,
|
||||
));
|
||||
run_benchmark(CustomGeluBenchmark::<B, D>::new(
|
||||
);
|
||||
let reference_erf_gelu = CustomGeluBenchmark::<B, D>::new(
|
||||
shape.clone(),
|
||||
num_repeats,
|
||||
device.clone(),
|
||||
GeluKind::WithReferenceErf,
|
||||
));
|
||||
run_benchmark(CustomGeluBenchmark::<B, D>::new(
|
||||
);
|
||||
let custom_erf_gelu = CustomGeluBenchmark::<B, D>::new(
|
||||
shape,
|
||||
num_repeats,
|
||||
device.clone(),
|
||||
GeluKind::WithCustomErf,
|
||||
));
|
||||
);
|
||||
|
||||
Persistence::persist::<B>(
|
||||
vec![
|
||||
run_benchmark(reference_gelu),
|
||||
run_benchmark(reference_erf_gelu),
|
||||
run_benchmark(custom_erf_gelu),
|
||||
],
|
||||
device,
|
||||
)
|
||||
}
|
||||
|
||||
fn main() {
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
use backend_comparison::persistence::Persistence;
|
||||
use burn::tensor::{backend::Backend, Data, Distribution, Shape, Tensor};
|
||||
use burn_common::benchmark::{run_benchmark, Benchmark};
|
||||
use derive_new::new;
|
||||
|
@ -76,9 +77,10 @@ fn bench<B: Backend>(device: &B::Device) {
|
|||
let to_benchmark = ToDataBenchmark::<B, D>::new(shape.clone(), num_repeats, device.clone());
|
||||
let from_benchmark = FromDataBenchmark::<B, D>::new(shape, num_repeats, device.clone());
|
||||
|
||||
println!("Backend {}", B::name());
|
||||
run_benchmark(to_benchmark);
|
||||
run_benchmark(from_benchmark)
|
||||
Persistence::persist::<B>(
|
||||
vec![run_benchmark(to_benchmark), run_benchmark(from_benchmark)],
|
||||
device,
|
||||
)
|
||||
}
|
||||
|
||||
fn main() {
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
use backend_comparison::persistence::Persistence;
|
||||
use burn::tensor::{backend::Backend, Distribution, Shape, Tensor};
|
||||
use burn_common::benchmark::{run_benchmark, Benchmark};
|
||||
use derive_new::new;
|
||||
|
@ -56,8 +57,7 @@ fn bench<B: Backend>(device: &B::Device) {
|
|||
let shape_rhs = [batch_size, k, n].into();
|
||||
|
||||
let benchmark = MatmulBenchmark::<B, D>::new(shape_lhs, shape_rhs, num_repeats, device.clone());
|
||||
println!("Backend {}", B::name());
|
||||
run_benchmark(benchmark);
|
||||
Persistence::persist::<B>(vec![run_benchmark(benchmark)], device)
|
||||
}
|
||||
|
||||
fn main() {
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
use backend_comparison::persistence::Persistence;
|
||||
use burn::tensor::{backend::Backend, Distribution, Shape, Tensor};
|
||||
use burn_common::benchmark::{run_benchmark, Benchmark};
|
||||
use derive_new::new;
|
||||
|
@ -40,8 +41,7 @@ fn bench<B: Backend>(device: &B::Device) {
|
|||
|
||||
let benchmark = UnaryBenchmark::<B, D>::new(shape, num_repeats, device.clone());
|
||||
|
||||
println!("Backend {}", B::name());
|
||||
run_benchmark(benchmark)
|
||||
Persistence::persist::<B>(vec![run_benchmark(benchmark)], device)
|
||||
}
|
||||
|
||||
fn main() {
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
pub mod persistence;
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! bench_on_backend {
|
||||
() => {
|
||||
|
|
|
@ -0,0 +1,100 @@
|
|||
use std::{
|
||||
collections::HashMap,
|
||||
fs::{create_dir_all, File},
|
||||
path::PathBuf,
|
||||
time::Duration,
|
||||
};
|
||||
|
||||
use burn::tensor::backend::Backend;
|
||||
use burn_common::benchmark::BenchmarkResult;
|
||||
use dirs;
|
||||
use serde_json;
|
||||
|
||||
type BenchmarkCommitResults = HashMap<String, BenchmarkOpResults>;
|
||||
type BenchmarkOpResults = HashMap<String, BenchmarkBackendResults>;
|
||||
type BenchmarkBackendResults = HashMap<String, StampedBenchmarks>;
|
||||
type StampedBenchmarks = HashMap<u128, Vec<Duration>>;
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct Persistence {
|
||||
results: BenchmarkCommitResults,
|
||||
}
|
||||
|
||||
impl Persistence {
|
||||
/// Updates the cached backend comparison json file with new benchmarks results.
|
||||
///
|
||||
/// The file has the following structure:
|
||||
///
|
||||
/// {
|
||||
/// "GIT_COMMIT_HASH":
|
||||
/// {
|
||||
/// "BENCHMARK_NAME (OP + SHAPE)": {
|
||||
/// "BACKEND_NAME-DEVICE": {
|
||||
/// "TIMESTAMP": \[
|
||||
/// DURATIONS
|
||||
/// \]
|
||||
/// }
|
||||
/// }
|
||||
/// }
|
||||
/// }
|
||||
pub fn persist<B: Backend>(benches: Vec<BenchmarkResult>, device: &B::Device) {
|
||||
for bench in benches.iter() {
|
||||
println!("{}", bench);
|
||||
}
|
||||
let cache_file = dirs::home_dir()
|
||||
.expect("Could not get home directory")
|
||||
.join(".cache")
|
||||
.join("backend-comparison")
|
||||
.join("db.json");
|
||||
|
||||
let mut cache = Self::load(&cache_file);
|
||||
cache.update::<B>(device, benches);
|
||||
cache.save(&cache_file);
|
||||
println!("Persisting to {:?}", cache_file);
|
||||
}
|
||||
|
||||
/// Load the cache from disk.
|
||||
fn load(path: &PathBuf) -> Self {
|
||||
let results = match File::open(path) {
|
||||
Ok(file) => serde_json::from_reader(file)
|
||||
.expect("Should have parsed to BenchmarkCommitResults struct"),
|
||||
Err(_) => HashMap::default(),
|
||||
};
|
||||
|
||||
Self { results }
|
||||
}
|
||||
|
||||
/// Save the cache on disk.
|
||||
fn save(&self, path: &PathBuf) {
|
||||
if let Some(parent) = path.parent() {
|
||||
create_dir_all(parent).expect("Unable to create directory");
|
||||
}
|
||||
let file = File::create(path).expect("Unable to create backend comparison file");
|
||||
|
||||
serde_json::to_writer_pretty(file, &self.results)
|
||||
.expect("Unable to write to backend comparison file");
|
||||
}
|
||||
|
||||
/// Update the cache with the given [benchmark results](BenchmarkResult).
|
||||
///
|
||||
/// Assumes only that benches share the same backend and device.
|
||||
/// It could run faster if we assumed they have the same git hash
|
||||
fn update<B: Backend>(&mut self, device: &B::Device, benches: Vec<BenchmarkResult>) {
|
||||
let backend_key = format!("{}-{:?}", B::name(), device);
|
||||
|
||||
for bench in benches {
|
||||
let mut benchmark_op_results = self.results.remove(&bench.git_hash).unwrap_or_default();
|
||||
let mut benchmark_backend_results =
|
||||
benchmark_op_results.remove(&bench.name).unwrap_or_default();
|
||||
|
||||
let mut stamped_benchmarks = benchmark_backend_results
|
||||
.remove(&backend_key)
|
||||
.unwrap_or_default();
|
||||
|
||||
stamped_benchmarks.insert(bench.timestamp, bench.durations.durations);
|
||||
benchmark_backend_results.insert(backend_key.clone(), stamped_benchmarks);
|
||||
benchmark_op_results.insert(bench.name, benchmark_backend_results);
|
||||
self.results.insert(bench.git_hash, benchmark_op_results);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,2 @@
|
|||
mod base;
|
||||
pub use base::*;
|
|
@ -9,11 +9,12 @@ use std::time::Instant;
|
|||
|
||||
/// Results of a benchmark run.
|
||||
#[derive(new, Debug)]
|
||||
pub struct BenchmarkResult {
|
||||
durations: Vec<Duration>,
|
||||
pub struct BenchmarkDurations {
|
||||
/// All durations of the run, in the order they were benchmarked
|
||||
pub durations: Vec<Duration>,
|
||||
}
|
||||
|
||||
impl BenchmarkResult {
|
||||
impl BenchmarkDurations {
|
||||
/// Returns the median duration among all durations
|
||||
pub fn median_duration(&self) -> Duration {
|
||||
let mut sorted = self.durations.clone();
|
||||
|
@ -25,7 +26,7 @@ impl BenchmarkResult {
|
|||
}
|
||||
}
|
||||
|
||||
impl Display for BenchmarkResult {
|
||||
impl Display for BenchmarkDurations {
|
||||
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
|
||||
let mean = self.mean_duration();
|
||||
let var = self
|
||||
|
@ -87,7 +88,7 @@ pub trait Benchmark {
|
|||
/// Wait for computations to be over
|
||||
fn sync(&self);
|
||||
/// Run the benchmark a number of times.
|
||||
fn run(&self) -> BenchmarkResult {
|
||||
fn run(&self) -> BenchmarkDurations {
|
||||
#[cfg(not(feature = "std"))]
|
||||
panic!("Attempting to run benchmark in a no-std environment");
|
||||
|
||||
|
@ -114,14 +115,42 @@ pub trait Benchmark {
|
|||
durations.push(end - start);
|
||||
}
|
||||
|
||||
BenchmarkResult { durations }
|
||||
BenchmarkDurations { durations }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Result of a benchmark run, with metadata
|
||||
pub struct BenchmarkResult {
|
||||
/// Individual results of the run
|
||||
pub durations: BenchmarkDurations,
|
||||
/// Time just before the run
|
||||
pub timestamp: u128,
|
||||
/// Git commit hash of the commit in which the run occurred
|
||||
pub git_hash: String,
|
||||
/// Name of the benchmark, normally with operation name and shapes
|
||||
pub name: String,
|
||||
}
|
||||
|
||||
impl Display for BenchmarkResult {
|
||||
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
|
||||
f.write_str(
|
||||
format!(
|
||||
"
|
||||
Timestamp: {}
|
||||
Git Hash: {}
|
||||
Benchmarking - {}{}
|
||||
",
|
||||
self.timestamp, self.git_hash, self.name, self.durations
|
||||
)
|
||||
.as_str(),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "std")]
|
||||
/// Runs the given benchmark on the device and prints result and information.
|
||||
pub fn run_benchmark<BM>(benchmark: BM)
|
||||
pub fn run_benchmark<BM>(benchmark: BM) -> BenchmarkResult
|
||||
where
|
||||
BM: Benchmark,
|
||||
{
|
||||
|
@ -130,12 +159,14 @@ where
|
|||
.unwrap()
|
||||
.as_millis();
|
||||
let output = std::process::Command::new("git")
|
||||
.args(["rev-porse", "HEAD"])
|
||||
.args(["rev-parse", "HEAD"])
|
||||
.output()
|
||||
.unwrap();
|
||||
let git_hash = String::from_utf8(output.stdout).unwrap();
|
||||
|
||||
println!("Timestamp: {}", timestamp);
|
||||
println!("Git Hash: {}", str::trim(&git_hash));
|
||||
println!("Benchmarking - {}{}", benchmark.name(), benchmark.run());
|
||||
let git_hash = String::from_utf8(output.stdout).unwrap().trim().to_string();
|
||||
BenchmarkResult {
|
||||
timestamp,
|
||||
git_hash,
|
||||
name: benchmark.name(),
|
||||
durations: benchmark.run(),
|
||||
}
|
||||
}
|
||||
|
|
|
@ -4,7 +4,7 @@ use core::time::Duration;
|
|||
use alloc::boxed::Box;
|
||||
use alloc::string::ToString;
|
||||
use alloc::vec::Vec;
|
||||
use burn_common::benchmark::{Benchmark, BenchmarkResult};
|
||||
use burn_common::benchmark::{Benchmark, BenchmarkDurations};
|
||||
|
||||
use crate::channel::ComputeChannel;
|
||||
use crate::client::ComputeClient;
|
||||
|
@ -50,7 +50,7 @@ impl<S: ComputeServer, C: ComputeChannel<S>> Tuner<S, C> {
|
|||
let mut names = Vec::with_capacity(autotunables.len());
|
||||
|
||||
// Run all autotune benchmarks
|
||||
let results: Vec<BenchmarkResult> = autotunables
|
||||
let results: Vec<BenchmarkDurations> = autotunables
|
||||
.into_iter()
|
||||
.map(|op| {
|
||||
names.push(op.name().to_string());
|
||||
|
@ -78,11 +78,11 @@ impl<S: ComputeServer, C: ComputeChannel<S>> Tuner<S, C> {
|
|||
&mut self,
|
||||
operation: Box<dyn AutotuneOperation>,
|
||||
client: &ComputeClient<S, C>,
|
||||
) -> BenchmarkResult {
|
||||
) -> BenchmarkDurations {
|
||||
TuneBenchmark::new(operation, client.clone()).run()
|
||||
}
|
||||
|
||||
fn find_fastest(&self, results: Vec<BenchmarkResult>) -> usize {
|
||||
fn find_fastest(&self, results: Vec<BenchmarkDurations>) -> usize {
|
||||
let mut smallest_duration = Duration::MAX;
|
||||
let mut fastest_tunable = None;
|
||||
|
||||
|
|
Loading…
Reference in New Issue