chore(candle): Allow enabling accelerate (#1009)

* chore(candle): Allow enabling accelerate * Temporarily disable test for accelerate feature * Allow enabling accelerate from upstream * Update the README * Have xtask also test using accelerate * Renable failing test * Fix matmul on candle when using accelerate * Add additional comment to xtask method
2023-11-30 19:03:00 +01:00 · 2023-11-30 19:03:00 +01:00 · f73136e3df
parent 1d4e91ad32
commit f73136e3df
7 changed files with 25 additions and 2 deletions
--- a/backend-comparison/Cargo.toml
+++ b/backend-comparison/Cargo.toml
@ -14,6 +14,7 @@ default = ["std"]
 std = []
 candle-cpu = ["burn/candle"]
 candle-cuda = ["burn/candle-cuda"]
+candle-accelerate = ["burn/candle-accelerate"]
 ndarray = ["burn/ndarray"]
 ndarray-blas-accelerate = ["burn/ndarray-blas-accelerate"]
 ndarray-blas-netlib = ["burn/ndarray-blas-netlib"]
--- a/burn-candle/Cargo.toml
+++ b/burn-candle/Cargo.toml
@ -12,6 +12,7 @@ version = "0.11.0"

 [features]
 cuda = ["candle-core/cuda"]
+accelerate = ["candle-core/accelerate"]

 [dependencies]
 derive-new = { workspace = true }
--- a/burn-candle/README.md
+++ b/burn-candle/README.md
@ -4,4 +4,11 @@ This crate provides a backend for [Burn](https://github.com/burn-rs/burn) based

 It is still in alpha stage, not all operations are supported. It is usable for some use cases, like for inference. 

-It can be used with CPU or CUDA. 
+It can be used with CPU or CUDA. On macOS computations can be accelerated by using the Accelerate framework.
+
+## Feature Flags
+
+The following features are supported:
+
+- `cuda` - Cuda GPU device (NVIDIA only)
+- `accelerate` - Accelerate framework (macOS only)
--- a/burn-candle/src/ops/tensor.rs
+++ b/burn-candle/src/ops/tensor.rs
@ -137,7 +137,8 @@ impl<F: FloatCandleElement, I: IntCandleElement> TensorOps<Self> for Candle<F, I
        lhs: FloatTensor<Self, D>,
        rhs: FloatTensor<Self, D>,
    ) -> FloatTensor<Self, D> {
-        CandleTensor::new(lhs.tensor.broadcast_matmul(&rhs.tensor).unwrap())
+        let rhs_contiguous = rhs.tensor.contiguous().unwrap();
+        CandleTensor::new(lhs.tensor.broadcast_matmul(&rhs_contiguous).unwrap())
    }

    fn swap_dims<const D: usize>(
--- a/burn-core/Cargo.toml
+++ b/burn-core/Cargo.toml
@ -55,6 +55,7 @@ tch = ["burn-tch"]

 candle = ["burn-candle"]
 candle-cuda = ["candle", "burn-candle/cuda"]
+candle-accelerate = ["candle", "burn-candle/accelerate"]

 # Serialization formats
 experimental-named-tensor = ["burn-tensor/experimental-named-tensor"]
--- a/burn/Cargo.toml
+++ b/burn/Cargo.toml
@ -51,6 +51,7 @@ wgpu = ["burn-core/wgpu"]
 tch = ["burn-core/tch"]
 candle = ["burn-core/candle"]
 candle-cuda = ["burn-core/candle-cuda"]
+candle-accelerate = ["burn-core/candle-accelerate"]

 # Experimental
 experimental-named-tensor = ["burn-core/experimental-named-tensor"]
--- a/xtask/src/runchecks.rs
+++ b/xtask/src/runchecks.rs
@ -249,6 +249,13 @@ fn burn_dataset_features_std() {
    cargo_doc(["-p", "burn-dataset", "--all-features"].into());
 }

+// Test burn-candle with accelerate (macOS only)
+// Leverages the macOS Accelerate framework: https://developer.apple.com/documentation/accelerate
+#[cfg(target_os = "macos")]
+fn burn_candle_accelerate() {
+    cargo_test(["-p", "burn-candle", "--features", "accelerate"].into());
+}
+
 fn std_checks() {
    // Set RUSTDOCFLAGS environment variable to treat warnings as errors
    // for the documentation build
@ -284,6 +291,10 @@ fn std_checks() {
    // Test each workspace
    cargo_test(["--workspace"].into());

+    // Test burn-candle with accelerate (macOS only)
+    #[cfg(target_os = "macos")]
+    burn_candle_accelerate();
+
    // Test burn-dataset features
    burn_dataset_features_std();