diff --git a/Cargo.lock b/Cargo.lock index 43580b75f..b1f6143fa 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -199,6 +199,7 @@ dependencies = [ "arboard", "burn", "burn-common", + "burn-wgpu", "clap 4.5.2", "crossterm", "derive-new", @@ -213,6 +214,8 @@ dependencies = [ "serial_test", "strum", "strum_macros", + "sysinfo", + "wgpu", ] [[package]] @@ -2090,7 +2093,7 @@ dependencies = [ "presser", "thiserror", "winapi", - "windows", + "windows 0.51.1", ] [[package]] @@ -4394,9 +4397,9 @@ dependencies = [ [[package]] name = "sysinfo" -version = "0.29.11" +version = "0.30.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd727fc423c2060f6c92d9534cef765c65a6ed3f428a03d7def74a8c4348e666" +checksum = "0c385888ef380a852a16209afc8cfad22795dd8873d69c9a14d2e2088f118d18" dependencies = [ "cfg-if", "core-foundation-sys", @@ -4404,7 +4407,8 @@ dependencies = [ "ntapi", "once_cell", "rayon", - "winapi", + "serde", + "windows 0.52.0", ] [[package]] @@ -5251,6 +5255,16 @@ dependencies = [ "windows-targets 0.48.5", ] +[[package]] +name = "windows" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e48a53791691ab099e5e2ad123536d0fff50652600abaf43bbf952894110d0be" +dependencies = [ + "windows-core 0.52.0", + "windows-targets 0.52.3", +] + [[package]] name = "windows-core" version = "0.51.1" diff --git a/Cargo.toml b/Cargo.toml index 973cdae2a..8643b9bcd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -130,7 +130,7 @@ uuid = { version = "1.5.0", default-features = false } libc = "0.2.153" tch = "0.15.0" nvml-wrapper = "0.9.0" -sysinfo = "0.29.10" +sysinfo = "0.30.7" systemstat = "0.2.3" diff --git a/backend-comparison/Cargo.toml b/backend-comparison/Cargo.toml index 6f0d93c16..df64f8342 100644 --- a/backend-comparison/Cargo.toml +++ b/backend-comparison/Cargo.toml @@ -29,6 +29,7 @@ wgpu-fusion = ["wgpu", "burn/fusion"] arboard = { workspace = true } burn = { path = "../crates/burn", default-features = false } burn-common = { path = "../crates/burn-common", version = "0.13.0" } +burn-wgpu = { path = "../crates/burn-wgpu", version = "0.13.0" } clap = { workspace = true } crossterm = { workspace = true, optional = true } derive-new = { workspace = true } @@ -41,6 +42,8 @@ serde = { workspace = true } serde_json = { workspace = true } strum = { workspace = true } strum_macros = { workspace = true } +sysinfo = { workspace = true, features = ["serde"] } +wgpu = { workspace = true } [dev-dependencies] rstest = { workspace = true } diff --git a/backend-comparison/src/persistence/base.rs b/backend-comparison/src/persistence/base.rs index 46113062d..9ce8f1f4d 100644 --- a/backend-comparison/src/persistence/base.rs +++ b/backend-comparison/src/persistence/base.rs @@ -1,3 +1,4 @@ +use super::system_info::BenchmarkSystemInfo; use burn::{ serde::{de::Visitor, ser::SerializeStruct, Deserialize, Serialize, Serializer}, tensor::backend::Backend, @@ -9,10 +10,12 @@ use serde_json; use std::fmt::Display; use std::time::Duration; use std::{fs, io::Write}; + #[derive(Default, Clone)] pub struct BenchmarkRecord { backend: String, device: String, + system_info: BenchmarkSystemInfo, pub results: BenchmarkResult, } @@ -26,18 +29,19 @@ pub struct BenchmarkRecord { /// { /// "backend": "backend name", /// "device": "device name", -/// "git_hash": "hash", +/// "gitHash": "hash", +/// "max": "duration in microseconds", +/// "mean": "duration in microseconds", +/// "median": "duration in microseconds", +/// "min": "duration in microseconds", /// "name": "benchmark name", -/// "operation": "operation name", -/// "shapes": ["shape dimension", "shape dimension", ...], -/// "timestamp": "timestamp", /// "numSamples": "number of samples", -/// "min": "duration in seconds", -/// "max": "duration in seconds", -/// "median": "duration in seconds", -/// "mean": "duration in seconds", -/// "variance": "duration in seconds" -/// "rawDurations": ["duration 1", "duration 2", ...], +/// "operation": "operation name", +/// "rawDurations": [{"secs": "number of seconds", "nanos": "number of nanons"}, ...], +/// "shapes": [[shape 1], [shape 2], ...], +/// "systemInfo": { "cpus": ["cpu1", "cpu2", ...], "gpus": ["gpu1", "gpu2", ...]} +/// "timestamp": "timestamp", +/// "variance": "duration in microseconds", /// }, /// { ... } /// ] @@ -67,6 +71,7 @@ pub fn save( .map(|bench| BenchmarkRecord { backend: B::name().to_string(), device: format!("{:?}", device), + system_info: BenchmarkSystemInfo::new(), results: bench, }) .collect(); @@ -161,6 +166,7 @@ impl Serialize for BenchmarkRecord { ("numSamples", &self.results.raw.durations.len()), ("options", &self.results.options), ("rawDurations", &self.results.raw.durations), + ("systemInfo", &self.system_info), ("shapes", &self.results.shapes), ("timestamp", &self.results.timestamp), ("variance", &self.results.computed.variance.as_micros()) @@ -202,16 +208,16 @@ impl<'de> Visitor<'de> for BenchmarkRecordVisitor { let value = map.next_value::()?; br.results.computed.min = Duration::from_micros(value); } + "numSamples" => _ = map.next_value::()?, "options" => br.results.options = map.next_value::>()?, "rawDurations" => br.results.raw.durations = map.next_value::>()?, "shapes" => br.results.shapes = map.next_value::>>()?, + "systemInfo" => br.system_info = map.next_value::()?, "timestamp" => br.results.timestamp = map.next_value::()?, "variance" => { let value = map.next_value::()?; br.results.computed.variance = Duration::from_micros(value) } - - "numSamples" => _ = map.next_value::()?, _ => panic!("Unexpected Key: {}", key), } } diff --git a/backend-comparison/src/persistence/mod.rs b/backend-comparison/src/persistence/mod.rs index 096c94ead..3b15cfca8 100644 --- a/backend-comparison/src/persistence/mod.rs +++ b/backend-comparison/src/persistence/mod.rs @@ -1,2 +1,4 @@ mod base; +mod system_info; + pub use base::*; diff --git a/backend-comparison/src/persistence/system_info.rs b/backend-comparison/src/persistence/system_info.rs new file mode 100644 index 000000000..6d41e1c5d --- /dev/null +++ b/backend-comparison/src/persistence/system_info.rs @@ -0,0 +1,52 @@ +use burn::serde::{Deserialize, Serialize}; +use burn_wgpu::GraphicsApi; +use std::collections::HashSet; +use sysinfo; +use wgpu; + +#[derive(Default, Clone, Serialize, Deserialize)] +pub(crate) struct BenchmarkSystemInfo { + cpus: Vec, + gpus: Vec, +} + +impl BenchmarkSystemInfo { + pub(crate) fn new() -> Self { + Self { + cpus: BenchmarkSystemInfo::enumerate_cpus(), + gpus: BenchmarkSystemInfo::enumerate_gpus(), + } + } + + fn enumerate_cpus() -> Vec { + let system = sysinfo::System::new_with_specifics( + sysinfo::RefreshKind::new().with_cpu(sysinfo::CpuRefreshKind::everything()), + ); + let cpu_names: HashSet = system + .cpus() + .iter() + .map(|c| c.brand().to_string()) + .collect(); + cpu_names.into_iter().collect() + } + + fn enumerate_gpus() -> Vec { + let instance = wgpu::Instance::default(); + let adapters: Vec = instance + .enumerate_adapters(burn_wgpu::AutoGraphicsApi::backend().into()) + .filter(|adapter| { + let info = adapter.get_info(); + info.device_type == wgpu::DeviceType::DiscreteGpu + || info.device_type == wgpu::DeviceType::IntegratedGpu + }) + .collect(); + let gpu_names: HashSet = adapters + .iter() + .map(|adapter| { + let info = adapter.get_info(); + info.name + }) + .collect(); + gpu_names.into_iter().collect() + } +} diff --git a/crates/burn-train/src/metric/cpu_use.rs b/crates/burn-train/src/metric/cpu_use.rs index 353165d28..276979308 100644 --- a/crates/burn-train/src/metric/cpu_use.rs +++ b/crates/burn-train/src/metric/cpu_use.rs @@ -1,7 +1,7 @@ use super::{MetricMetadata, Numeric}; use crate::metric::{Metric, MetricEntry}; use std::time::{Duration, Instant}; -use sysinfo::{CpuExt, CpuRefreshKind, RefreshKind, System, SystemExt}; +use sysinfo::{CpuRefreshKind, RefreshKind, System}; /// General CPU Usage metric pub struct CpuUse { diff --git a/crates/burn-train/src/metric/memory_use.rs b/crates/burn-train/src/metric/memory_use.rs index 832c910f6..1834e65ea 100644 --- a/crates/burn-train/src/metric/memory_use.rs +++ b/crates/burn-train/src/metric/memory_use.rs @@ -2,7 +2,7 @@ use super::{MetricMetadata, Numeric}; use crate::metric::{Metric, MetricEntry}; use std::time::{Duration, Instant}; -use sysinfo::{System, SystemExt}; +use sysinfo::System; /// Memory information pub struct CpuMemory {