diff --git a/backend-comparison/benches/binary.rs b/backend-comparison/benches/binary.rs index c2dc222a6..16db7a185 100644 --- a/backend-comparison/benches/binary.rs +++ b/backend-comparison/benches/binary.rs @@ -36,13 +36,25 @@ impl Benchmark for BinaryBenchmark { } #[allow(dead_code)] -fn bench(device: &B::Device, url: Option<&str>, token: Option<&str>) { +fn bench( + device: &B::Device, + feature_name: &str, + url: Option<&str>, + token: Option<&str>, +) { let benchmark = BinaryBenchmark:: { shape: [32, 512, 1024].into(), device: device.clone(), }; - save::(vec![run_benchmark(benchmark)], device, url, token).unwrap(); + save::( + vec![run_benchmark(benchmark)], + device, + feature_name, + url, + token, + ) + .unwrap(); } fn main() { diff --git a/backend-comparison/benches/conv_transpose2d.rs b/backend-comparison/benches/conv_transpose2d.rs index 976cd41a8..13ad6e1c4 100644 --- a/backend-comparison/benches/conv_transpose2d.rs +++ b/backend-comparison/benches/conv_transpose2d.rs @@ -54,7 +54,12 @@ impl Benchmark for ConvTranspose2dBenchmark { } #[allow(dead_code)] -fn bench(device: &B::Device, url: Option<&str>, token: Option<&str>) { +fn bench( + device: &B::Device, + feature_name: &str, + url: Option<&str>, + token: Option<&str>, +) { // Shapes let batch_size = 16; let channels_in = 16; @@ -85,7 +90,14 @@ fn bench(device: &B::Device, url: Option<&str>, token: Option<&str>) device: device.clone(), }; - save::(vec![run_benchmark(benchmark)], device, url, token).unwrap(); + save::( + vec![run_benchmark(benchmark)], + device, + feature_name, + url, + token, + ) + .unwrap(); } fn main() { diff --git a/backend-comparison/benches/custom_gelu.rs b/backend-comparison/benches/custom_gelu.rs index ed6ea4949..8f42eb11e 100644 --- a/backend-comparison/benches/custom_gelu.rs +++ b/backend-comparison/benches/custom_gelu.rs @@ -113,7 +113,12 @@ fn erf_positive(x: Tensor) -> Tensor { } #[allow(dead_code)] -fn bench(device: &B::Device, url: Option<&str>, token: Option<&str>) { +fn bench( + device: &B::Device, + feature_name: &str, + url: Option<&str>, + token: Option<&str>, +) { const D: usize = 3; let shape: Shape = [32, 512, 2048].into(); @@ -144,6 +149,7 @@ fn bench(device: &B::Device, url: Option<&str>, token: Option<&str>) run_benchmark(custom_erf_gelu), ], device, + feature_name, url, token, ) diff --git a/backend-comparison/benches/data.rs b/backend-comparison/benches/data.rs index da4da715a..afe625bdd 100644 --- a/backend-comparison/benches/data.rs +++ b/backend-comparison/benches/data.rs @@ -71,7 +71,12 @@ impl Benchmark for FromDataBenchmark { } #[allow(dead_code)] -fn bench(device: &B::Device, url: Option<&str>, token: Option<&str>) { +fn bench( + device: &B::Device, + feature_name: &str, + url: Option<&str>, + token: Option<&str>, +) { const D: usize = 3; let shape: Shape = [32, 512, 1024].into(); @@ -81,6 +86,7 @@ fn bench(device: &B::Device, url: Option<&str>, token: Option<&str>) save::( vec![run_benchmark(to_benchmark), run_benchmark(from_benchmark)], device, + feature_name, url, token, ) diff --git a/backend-comparison/benches/matmul.rs b/backend-comparison/benches/matmul.rs index 38a69add1..92adfb9aa 100644 --- a/backend-comparison/benches/matmul.rs +++ b/backend-comparison/benches/matmul.rs @@ -42,7 +42,12 @@ impl Benchmark for MatmulBenchmark { } #[allow(dead_code)] -fn bench(device: &B::Device, url: Option<&str>, token: Option<&str>) { +fn bench( + device: &B::Device, + feature_name: &str, + url: Option<&str>, + token: Option<&str>, +) { const D: usize = 3; let batch_size = 3; let m = 1024; @@ -53,7 +58,14 @@ fn bench(device: &B::Device, url: Option<&str>, token: Option<&str>) let benchmark = MatmulBenchmark::::new(shape_lhs, shape_rhs, device.clone()); - save::(vec![run_benchmark(benchmark)], device, url, token).unwrap(); + save::( + vec![run_benchmark(benchmark)], + device, + feature_name, + url, + token, + ) + .unwrap(); } fn main() { diff --git a/backend-comparison/benches/max_pool2d.rs b/backend-comparison/benches/max_pool2d.rs index b05f130d0..80e2ffab2 100644 --- a/backend-comparison/benches/max_pool2d.rs +++ b/backend-comparison/benches/max_pool2d.rs @@ -42,7 +42,12 @@ impl Benchmark for MaxPool2dBenchmark { } #[allow(dead_code)] -fn bench(device: &B::Device, url: Option<&str>, token: Option<&str>) { +fn bench( + device: &B::Device, + feature_name: &str, + url: Option<&str>, + token: Option<&str>, +) { let benchmark = MaxPool2dBenchmark:: { shape: [32, 32, 512, 512].into(), kernel_size: [5, 5], @@ -52,7 +57,14 @@ fn bench(device: &B::Device, url: Option<&str>, token: Option<&str>) device: device.clone(), }; - save::(vec![run_benchmark(benchmark)], device, url, token).unwrap(); + save::( + vec![run_benchmark(benchmark)], + device, + feature_name, + url, + token, + ) + .unwrap(); } fn main() { diff --git a/backend-comparison/benches/unary.rs b/backend-comparison/benches/unary.rs index f11168353..98fa89c85 100644 --- a/backend-comparison/benches/unary.rs +++ b/backend-comparison/benches/unary.rs @@ -35,13 +35,25 @@ impl Benchmark for UnaryBenchmark { } #[allow(dead_code)] -fn bench(device: &B::Device, url: Option<&str>, token: Option<&str>) { +fn bench( + device: &B::Device, + feature_name: &str, + url: Option<&str>, + token: Option<&str>, +) { const D: usize = 3; let shape: Shape = [32, 512, 1024].into(); let benchmark = UnaryBenchmark::::new(shape, device.clone()); - save::(vec![run_benchmark(benchmark)], device, url, token).unwrap(); + save::( + vec![run_benchmark(benchmark)], + device, + feature_name, + url, + token, + ) + .unwrap(); } fn main() { diff --git a/backend-comparison/src/lib.rs b/backend-comparison/src/lib.rs index b2fd01344..e661a3452 100644 --- a/backend-comparison/src/lib.rs +++ b/backend-comparison/src/lib.rs @@ -33,12 +33,41 @@ macro_rules! bench_on_backend { let args: Vec = env::args().collect(); let url = backend_comparison::get_sharing_url(&args); let token = backend_comparison::get_sharing_token(&args); + #[cfg(feature = "candle-accelerate")] + let feature_name = "candle-accelerate"; + #[cfg(feature = "candle-cpu")] + let feature_name = "candle-cpu"; + #[cfg(feature = "candle-cuda")] + let feature_name = "candle-cuda"; + #[cfg(feature = "candle-metal")] + let feature_name = "candle-metal"; + #[cfg(feature = "ndarray")] + let feature_name = "ndarray"; + #[cfg(feature = "ndarray-blas-accelerate")] + let feature_name = "ndarray-blas-accelerate"; + #[cfg(feature = "ndarray-blas-netlib")] + let feature_name = "ndarray-blas-netlib"; + #[cfg(feature = "ndarray-blas-openblas")] + let feature_name = "ndarray-blas-openblas"; + #[cfg(feature = "tch-cpu")] + let feature_name = "tch-cpu"; + #[cfg(feature = "tch-gpu")] + let feature_name = "tch-gpu"; + #[cfg(feature = "wgpu")] + let feature_name = "wgpu"; + #[cfg(feature = "wgpu-fusion")] + let feature_name = "wgpu-fusion"; #[cfg(feature = "wgpu")] { use burn::backend::wgpu::{AutoGraphicsApi, Wgpu, WgpuDevice}; - bench::>(&WgpuDevice::default(), url, token); + bench::>( + &WgpuDevice::default(), + feature_name, + url, + token, + ); } #[cfg(feature = "tch-gpu")] @@ -49,7 +78,7 @@ macro_rules! bench_on_backend { let device = LibTorchDevice::Cuda(0); #[cfg(target_os = "macos")] let device = LibTorchDevice::Mps; - bench::(&device, url, token); + bench::(&device, feature_name, url, token); } #[cfg(feature = "tch-cpu")] @@ -57,7 +86,7 @@ macro_rules! bench_on_backend { use burn::backend::{libtorch::LibTorchDevice, LibTorch}; let device = LibTorchDevice::Cpu; - bench::(&device, url, token); + bench::(&device, feature_name, url, token); } #[cfg(any( @@ -71,7 +100,7 @@ macro_rules! bench_on_backend { use burn::backend::NdArray; let device = NdArrayDevice::Cpu; - bench::(&device, url, token); + bench::(&device, feature_name, url, token); } #[cfg(feature = "candle-cpu")] @@ -80,7 +109,7 @@ macro_rules! bench_on_backend { use burn::backend::Candle; let device = CandleDevice::Cpu; - bench::(&device, url, token); + bench::(&device, feature_name, url, token); } #[cfg(feature = "candle-cuda")] @@ -89,7 +118,7 @@ macro_rules! bench_on_backend { use burn::backend::Candle; let device = CandleDevice::Cuda(0); - bench::(&device, url, token); + bench::(&device, feature_name, url, token); } #[cfg(feature = "candle-metal")] @@ -98,7 +127,7 @@ macro_rules! bench_on_backend { use burn::backend::Candle; let device = CandleDevice::Metal(0); - bench::(&device, url, token); + bench::(&device, feature_name, url, token); } }; } diff --git a/backend-comparison/src/persistence/base.rs b/backend-comparison/src/persistence/base.rs index a85f7095d..7ca683ff0 100644 --- a/backend-comparison/src/persistence/base.rs +++ b/backend-comparison/src/persistence/base.rs @@ -15,6 +15,7 @@ use std::{fs, io::Write}; pub struct BenchmarkRecord { backend: String, device: String, + feature: String, system_info: BenchmarkSystemInfo, pub results: BenchmarkResult, } @@ -29,6 +30,7 @@ pub struct BenchmarkRecord { /// { /// "backend": "backend name", /// "device": "device name", +/// "feature": "feature name", /// "gitHash": "hash", /// "max": "duration in microseconds", /// "mean": "duration in microseconds", @@ -49,6 +51,7 @@ pub struct BenchmarkRecord { pub fn save( benches: Vec, device: &B::Device, + feature: &str, url: Option<&str>, token: Option<&str>, ) -> Result, std::io::Error> { @@ -71,6 +74,7 @@ pub fn save( .map(|bench| BenchmarkRecord { backend: B::name().to_string(), device: format!("{:?}", device), + feature: feature.to_string(), system_info: BenchmarkSystemInfo::new(), results: bench, }) @@ -157,6 +161,7 @@ impl Serialize for BenchmarkRecord { self, ("backend", &self.backend), ("device", &self.device), + ("feature", &self.feature), ("gitHash", &self.results.git_hash), ("max", &self.results.computed.max.as_micros()), ("mean", &self.results.computed.mean.as_micros()), @@ -190,6 +195,7 @@ impl<'de> Visitor<'de> for BenchmarkRecordVisitor { match key.as_str() { "backend" => br.backend = map.next_value::()?, "device" => br.device = map.next_value::()?, + "feature" => br.feature = map.next_value::()?, "gitHash" => br.results.git_hash = map.next_value::()?, "name" => br.results.name = map.next_value::()?, "max" => { @@ -243,30 +249,40 @@ pub(crate) struct BenchmarkCollection { impl Display for BenchmarkCollection { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { // Compute the max length for each column - let mut max_name_len = 0; - let mut max_backend_len = 0; + let mut max_name_len = "Benchmark".len(); + let mut max_backend_len = "Backend".len(); + let mut max_device_len = "Device".len(); + let mut max_feature_len = "Feature".len(); for record in self.records.iter() { - let backend_name = [record.backend.clone(), record.device.clone()].join("-"); max_name_len = max_name_len.max(record.results.name.len()); - max_backend_len = max_backend_len.max(backend_name.len()); + max_backend_len = max_backend_len.max(record.backend.len()); + max_device_len = max_device_len.max(record.device.len()); + max_feature_len = max_feature_len.max(record.feature.len()); } // Header writeln!( f, - "| {:width_name$}--|{:->width_backend$}--|----------------|", - "Benchmark", "Backend", "", "", width_name = max_name_len, width_backend = max_backend_len + "| {:width_name$}--|{:->width_feature$}--|{:->width_backend$}--|{:->width_device$}--|----------------|", + "Benchmark", "Feature", "Backend", "Device", "", "", "", "", + width_name = max_name_len, + width_feature = max_feature_len, + width_backend = max_backend_len, + width_device = max_device_len )?; // Table entries for record in self.records.iter() { - let backend_name = [record.backend.clone(), record.device.clone()].join("-"); writeln!( f, - "| {:(sample_result).unwrap(); assert!(record.backend == "candle"); assert!(record.device == "Cuda(0)"); + assert!(record.feature == "wgpu-fusion"); assert!(record.results.git_hash == "02d37011ab4dc773286e5983c09cde61f95ba4b5"); assert!(record.results.name == "unary"); assert!(record.results.computed.max.as_micros() == 8858);