Feat/gelu (#45)

2022-09-24 13:08:08 -04:00 · 2022-09-24 13:08:08 -04:00 · fe5ed0dbb5
parent a84df25d40
commit fe5ed0dbb5
6 changed files with 57 additions and 3 deletions
--- a/burn-tensor/examples/autodiff_simple.rs
+++ b/burn-tensor/examples/autodiff_simple.rs
@ -1,4 +1,5 @@
 use burn_tensor::{activation, backend, Data, Distribution, Shape, Tensor};
+use rand::{rngs::StdRng, SeedableRng};

 fn loss<B: backend::Backend>(x: &Tensor<B, 2>, y: &Tensor<B, 2>) -> Tensor<B, 2> {
    let z = x.matmul(y);
@ -35,8 +36,9 @@ fn run<B: backend::Backend>(x: Data<B::Elem, 2>, y: Data<B::Elem, 2>) {

 fn main() {
    // Same data for all backends
-    let x = Data::random(Shape::new([2, 3]), Distribution::Standard);
-    let y = Data::random(Shape::new([3, 1]), Distribution::Standard);
+    let mut rng = StdRng::from_entropy();
+    let x = Data::random(Shape::new([2, 3]), Distribution::Standard, &mut rng);
+    let y = Data::random(Shape::new([3, 1]), Distribution::Standard, &mut rng);

    #[cfg(feature = "ndarray")]
    {
--- a/burn-tensor/src/tensor/activation/base.rs
+++ b/burn-tensor/src/tensor/activation/base.rs
@ -1,15 +1,27 @@
 use crate::backend::Backend;
 use crate::Tensor;
-use crate::{ElementPrecision, Precision};
+use crate::{ElementConversion, ElementPrecision, Precision};

+/// Applies the rectified linear unit function.
 pub fn relu<const D: usize, B: Backend>(tensor: &Tensor<B, D>) -> Tensor<B, D> {
    tensor.relu()
 }

+/// Applies the Gaussian Error Linear Units function as described in the paper in [Gaussian Error Linear Units (GELUs)](https://arxiv.org/pdf/1606.08415v3.pdf).
+pub fn gelu<const D: usize, B: Backend>(tensor: &Tensor<B, D>) -> Tensor<B, D> {
+    let x = tensor
+        .div_scalar(&2.0_f32.sqrt().to_elem())
+        .erf()
+        .add_scalar(&1.0_f32.to_elem());
+    tensor.mul(&x).mul_scalar(&0.5_f32.to_elem())
+}
+
+/// Applies the softmax function.
 pub fn softmax<const D: usize, B: Backend>(tensor: &Tensor<B, D>, dim: usize) -> Tensor<B, D> {
    log_softmax(tensor, dim).exp()
 }

+/// Applies the log softmax function.
 pub fn log_softmax<const D: usize, B: Backend>(tensor: &Tensor<B, D>, dim: usize) -> Tensor<B, D> {
    let tensor_tmp = match B::Elem::precision() {
        Precision::Half => {
--- a/burn-tensor/tests/tensor/activation/gelu.rs
+++ b/burn-tensor/tests/tensor/activation/gelu.rs
@ -0,0 +1,18 @@
+use super::super::TestBackend;
+use burn_tensor::activation;
+use burn_tensor::{Data, Tensor};
+
+#[test]
+fn test_gelu() {
+    let data = Data::from([[
+        0.5447, 0.9809, 0.4114, 0.1398, 0.8045, 0.4103, 0.2388, 0.5262, 0.6677, 0.6737,
+    ]]);
+    let tensor = Tensor::<TestBackend, 2>::from_data(data);
+
+    let data_actual = activation::gelu(&tensor).to_data();
+
+    let data_expected = Data::from([[
+        0.3851, 0.8207, 0.2714, 0.0777, 0.6351, 0.2704, 0.1419, 0.3687, 0.4993, 0.5051,
+    ]]);
+    data_expected.assert_approx_eq(&data_actual, 3);
+}
--- a/burn-tensor/tests/tensor/activation/mod.rs
+++ b/burn-tensor/tests/tensor/activation/mod.rs
@ -1,2 +1,3 @@
+mod gelu;
 mod relu;
 mod softmax;
--- a/burn/src/nn/gelu.rs
+++ b/burn/src/nn/gelu.rs
@ -0,0 +1,19 @@
+use crate::module::Forward;
+use crate::tensor::backend::Backend;
+use crate::tensor::Tensor;
+
+/// Applies the Gaussian Error Linear Units function element-wise.
+#[derive(Clone, Debug, Default)]
+pub struct GELU {}
+
+impl GELU {
+    pub fn new() -> Self {
+        Self {}
+    }
+}
+
+impl<B: Backend, const D: usize> Forward<Tensor<B, D>, Tensor<B, D>> for GELU {
+    fn forward(&self, input: Tensor<B, D>) -> Tensor<B, D> {
+        crate::tensor::activation::gelu(&input)
+    }
+}
--- a/burn/src/nn/mod.rs
+++ b/burn/src/nn/mod.rs
@ -1,9 +1,11 @@
 mod dropout;
+mod gelu;
 mod layer_norm;
 mod linear;
 mod relu;

 pub use dropout::*;
+pub use gelu::*;
 pub use layer_norm::*;
 pub use linear::*;
 pub use relu::*;