mirror of https://github.com/tracel-ai/burn.git
parent
a2ec774c37
commit
c5c1104d75
|
@ -227,10 +227,6 @@ mod tests {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn create_model() -> nn::Linear<TestBackend> {
|
fn create_model() -> nn::Linear<TestBackend> {
|
||||||
nn::Linear::<crate::TestBackend>::new(&nn::LinearConfig {
|
nn::Linear::<crate::TestBackend>::new(&nn::LinearConfig::new(32, 32).with_bias(true))
|
||||||
d_input: 32,
|
|
||||||
d_output: 32,
|
|
||||||
bias: true,
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -6,9 +6,9 @@ use crate as burn;
|
||||||
use crate::config::Config;
|
use crate::config::Config;
|
||||||
use crate::module::Module;
|
use crate::module::Module;
|
||||||
use crate::module::Param;
|
use crate::module::Param;
|
||||||
|
use crate::nn::Initializer;
|
||||||
use crate::tensor::backend::Backend;
|
use crate::tensor::backend::Backend;
|
||||||
use crate::tensor::ElementConversion;
|
use crate::tensor::Tensor;
|
||||||
use crate::tensor::{Distribution, Tensor};
|
|
||||||
use burn_tensor::module::conv1d;
|
use burn_tensor::module::conv1d;
|
||||||
use burn_tensor::ops::conv::calculate_padding;
|
use burn_tensor::ops::conv::calculate_padding;
|
||||||
|
|
||||||
|
@ -28,6 +28,9 @@ pub struct Conv1dConfig {
|
||||||
/// If bias should be added to the output.
|
/// If bias should be added to the output.
|
||||||
#[config(default = true)]
|
#[config(default = true)]
|
||||||
pub bias: bool,
|
pub bias: bool,
|
||||||
|
/// The type of function used to initialize neural network parameters
|
||||||
|
#[config(default = "Initializer::UniformDefault")]
|
||||||
|
pub initializer: Initializer,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Padding configuration for 1D convolution [config](Conv1dConfig).
|
/// Padding configuration for 1D convolution [config](Conv1dConfig).
|
||||||
|
@ -64,19 +67,17 @@ impl<B: Backend> Conv1d<B> {
|
||||||
let k = (config.channels_in * config.kernel_size) as f64;
|
let k = (config.channels_in * config.kernel_size) as f64;
|
||||||
let k = sqrt(1.0 / k);
|
let k = sqrt(1.0 / k);
|
||||||
|
|
||||||
let k1: B::FloatElem = (-k).elem();
|
let initializer = if let Initializer::UniformDefault = config.initializer {
|
||||||
let k2: B::FloatElem = k.elem();
|
Initializer::Uniform(-k, k)
|
||||||
|
} else {
|
||||||
|
config.initializer.clone()
|
||||||
|
};
|
||||||
|
|
||||||
let weight = Tensor::random(
|
let weight =
|
||||||
[config.channels_out, config.channels_in, config.kernel_size],
|
initializer.init([config.channels_out, config.channels_in, config.kernel_size]);
|
||||||
Distribution::Uniform(k1, k2),
|
|
||||||
);
|
|
||||||
|
|
||||||
let bias = if config.bias {
|
let bias = if config.bias {
|
||||||
Some(Tensor::random(
|
Some(initializer.init([config.channels_out]))
|
||||||
[config.channels_out],
|
|
||||||
Distribution::Uniform(k1, k2),
|
|
||||||
))
|
|
||||||
} else {
|
} else {
|
||||||
None
|
None
|
||||||
};
|
};
|
||||||
|
@ -119,3 +120,35 @@ impl<B: Backend> Conv1d<B> {
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
pub type TB = burn_ndarray::NdArrayBackend<f32>;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn initializer_default() {
|
||||||
|
TB::seed(0);
|
||||||
|
let config = Conv1dConfig::new(5, 5, 5);
|
||||||
|
let k = (config.channels_in * config.kernel_size) as f64;
|
||||||
|
let k = sqrt(1.0 / k);
|
||||||
|
assert_eq!(config.initializer, Initializer::UniformDefault);
|
||||||
|
let conv: Conv1d<TB> = Conv1d::new(&config);
|
||||||
|
for item in conv.weight.to_data().value.iter() {
|
||||||
|
if *item < -k as f32 || *item > k as f32 {
|
||||||
|
panic!("Element ({item}) is not within the range of (-{k},{k})");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn initializer_zeros() {
|
||||||
|
TB::seed(0);
|
||||||
|
let config = Conv1dConfig::new(5, 5, 5).with_initializer(Initializer::Zeros);
|
||||||
|
assert_eq!(config.initializer, Initializer::Zeros);
|
||||||
|
let conv: Conv1d<TB> = Conv1d::new(&config);
|
||||||
|
for item in conv.weight.to_data().value.iter() {
|
||||||
|
assert_eq!(*item, 0.0f32);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -5,9 +5,9 @@ use crate as burn;
|
||||||
use crate::config::Config;
|
use crate::config::Config;
|
||||||
use crate::module::Module;
|
use crate::module::Module;
|
||||||
use crate::module::Param;
|
use crate::module::Param;
|
||||||
|
use crate::nn::Initializer;
|
||||||
use crate::tensor::backend::Backend;
|
use crate::tensor::backend::Backend;
|
||||||
use crate::tensor::ElementConversion;
|
use crate::tensor::Tensor;
|
||||||
use crate::tensor::{Distribution, Tensor};
|
|
||||||
use burn_tensor::module::conv2d;
|
use burn_tensor::module::conv2d;
|
||||||
use burn_tensor::ops::conv::calculate_padding;
|
use burn_tensor::ops::conv::calculate_padding;
|
||||||
|
|
||||||
|
@ -26,6 +26,9 @@ pub struct Conv2dConfig {
|
||||||
/// If bias should be added to the output.
|
/// If bias should be added to the output.
|
||||||
#[config(default = true)]
|
#[config(default = true)]
|
||||||
pub bias: bool,
|
pub bias: bool,
|
||||||
|
/// The type of function used to initialize neural network parameters
|
||||||
|
#[config(default = "Initializer::UniformDefault")]
|
||||||
|
pub initializer: Initializer,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Padding configuration for 2D convolution [config](Conv2dConfig).
|
/// Padding configuration for 2D convolution [config](Conv2dConfig).
|
||||||
|
@ -64,24 +67,21 @@ impl<B: Backend> Conv2d<B> {
|
||||||
let k = (config.channels[0] * config.kernel_size[0] * config.kernel_size[1]) as f64;
|
let k = (config.channels[0] * config.kernel_size[0] * config.kernel_size[1]) as f64;
|
||||||
let k = sqrt(1.0 / k);
|
let k = sqrt(1.0 / k);
|
||||||
|
|
||||||
let k1: B::FloatElem = (-k).elem();
|
let initializer = if let Initializer::UniformDefault = config.initializer {
|
||||||
let k2: B::FloatElem = k.elem();
|
Initializer::Uniform(-k, k)
|
||||||
|
} else {
|
||||||
|
config.initializer.clone()
|
||||||
|
};
|
||||||
|
|
||||||
let weight = Tensor::random(
|
let weight = initializer.init([
|
||||||
[
|
|
||||||
config.channels[1],
|
config.channels[1],
|
||||||
config.channels[0],
|
config.channels[0],
|
||||||
config.kernel_size[0],
|
config.kernel_size[0],
|
||||||
config.kernel_size[1],
|
config.kernel_size[1],
|
||||||
],
|
]);
|
||||||
Distribution::Uniform(k1, k2),
|
|
||||||
);
|
|
||||||
|
|
||||||
let bias = if config.bias {
|
let bias = if config.bias {
|
||||||
Some(Tensor::random(
|
Some(initializer.init([config.channels[1]]))
|
||||||
[config.channels[1]],
|
|
||||||
Distribution::Uniform(k1, k2),
|
|
||||||
))
|
|
||||||
} else {
|
} else {
|
||||||
None
|
None
|
||||||
};
|
};
|
||||||
|
@ -138,3 +138,35 @@ impl Conv2dPaddingConfig {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
pub type TB = burn_ndarray::NdArrayBackend<f32>;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn initializer_default() {
|
||||||
|
TB::seed(0);
|
||||||
|
let config = Conv2dConfig::new([5, 1], [5, 5]);
|
||||||
|
let k = (config.channels[0] * config.kernel_size[0] * config.kernel_size[1]) as f64;
|
||||||
|
let k = sqrt(1.0 / k);
|
||||||
|
assert_eq!(config.initializer, Initializer::UniformDefault);
|
||||||
|
let conv: Conv2d<TB> = Conv2d::new(&config);
|
||||||
|
for item in conv.weight.to_data().value.iter() {
|
||||||
|
if *item < -k as f32 || *item > k as f32 {
|
||||||
|
panic!("Element ({item}) is not within the range of (-{k},{k})");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn initializer_zeros() {
|
||||||
|
TB::seed(0);
|
||||||
|
let config = Conv2dConfig::new([5, 2], [5, 5]).with_initializer(Initializer::Zeros);
|
||||||
|
assert_eq!(config.initializer, Initializer::Zeros);
|
||||||
|
let conv: Conv2d<TB> = Conv2d::new(&config);
|
||||||
|
for item in conv.weight.to_data().value.iter() {
|
||||||
|
assert_eq!(*item, 0.0f32);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -3,11 +3,12 @@ use burn_tensor::Int;
|
||||||
|
|
||||||
use crate as burn;
|
use crate as burn;
|
||||||
|
|
||||||
|
use super::Initializer;
|
||||||
use crate::config::Config;
|
use crate::config::Config;
|
||||||
use crate::module::Module;
|
use crate::module::Module;
|
||||||
use crate::module::Param;
|
use crate::module::Param;
|
||||||
use crate::tensor::backend::Backend;
|
use crate::tensor::backend::Backend;
|
||||||
use crate::tensor::{Distribution, Tensor};
|
use crate::tensor::Tensor;
|
||||||
|
|
||||||
/// Configuration to create an [Embedding](Embedding) layer.
|
/// Configuration to create an [Embedding](Embedding) layer.
|
||||||
#[derive(Config)]
|
#[derive(Config)]
|
||||||
|
@ -16,6 +17,9 @@ pub struct EmbeddingConfig {
|
||||||
n_embedding: usize,
|
n_embedding: usize,
|
||||||
/// The size of each vector.
|
/// The size of each vector.
|
||||||
d_model: usize,
|
d_model: usize,
|
||||||
|
/// The type of function used to initialize neural network parameters
|
||||||
|
#[config(default = "Initializer::Normal(0.0,1.0)")]
|
||||||
|
pub initializer: Initializer,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Lookup table to store a fix number of vectors.
|
/// Lookup table to store a fix number of vectors.
|
||||||
|
@ -32,10 +36,9 @@ pub struct Embedding<B: Backend> {
|
||||||
impl<B: Backend> Embedding<B> {
|
impl<B: Backend> Embedding<B> {
|
||||||
/// Create the module from the given configuration.
|
/// Create the module from the given configuration.
|
||||||
pub fn new(config: &EmbeddingConfig) -> Self {
|
pub fn new(config: &EmbeddingConfig) -> Self {
|
||||||
let weight = Tensor::random(
|
let weight = config
|
||||||
[config.n_embedding, config.d_model],
|
.initializer
|
||||||
Distribution::Normal(0.0, 1.0),
|
.init([config.n_embedding, config.d_model])
|
||||||
)
|
|
||||||
.require_grad();
|
.require_grad();
|
||||||
|
|
||||||
Self {
|
Self {
|
||||||
|
@ -53,3 +56,36 @@ impl<B: Backend> Embedding<B> {
|
||||||
burn_tensor::module::embedding(self.weight.val(), input)
|
burn_tensor::module::embedding(self.weight.val(), input)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use burn_tensor::Data;
|
||||||
|
|
||||||
|
use super::*;
|
||||||
|
pub type TB = burn_ndarray::NdArrayBackend<f32>;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn initializer_default() {
|
||||||
|
TB::seed(0);
|
||||||
|
let config = EmbeddingConfig::new(100, 10);
|
||||||
|
assert_eq!(config.initializer, Initializer::Normal(0.0, 1.0));
|
||||||
|
let embed: Embedding<TB> = Embedding::new(&config);
|
||||||
|
let weights = embed.weight.val().reshape([1000]);
|
||||||
|
let (var_act, mean_act) = weights.var_mean(0);
|
||||||
|
var_act.to_data().assert_approx_eq(&Data::from([1.0f32]), 1);
|
||||||
|
mean_act
|
||||||
|
.to_data()
|
||||||
|
.assert_approx_eq(&Data::from([0.0f32]), 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn initializer_zeros() {
|
||||||
|
TB::seed(0);
|
||||||
|
let config = EmbeddingConfig::new(5, 5).with_initializer(Initializer::Zeros);
|
||||||
|
assert_eq!(config.initializer, Initializer::Zeros);
|
||||||
|
let conv: Embedding<TB> = Embedding::new(&config);
|
||||||
|
for item in conv.weight.to_data().value.iter() {
|
||||||
|
assert_eq!(*item, 0.0f32);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,107 @@
|
||||||
|
use burn_tensor::Shape;
|
||||||
|
|
||||||
|
use crate::config::Config;
|
||||||
|
use crate::tensor::backend::Backend;
|
||||||
|
use crate::tensor::{Distribution, ElementConversion, Tensor};
|
||||||
|
|
||||||
|
use crate as burn;
|
||||||
|
|
||||||
|
#[derive(Config, Debug, PartialEq)]
|
||||||
|
pub enum Initializer {
|
||||||
|
Uniform(f64, f64),
|
||||||
|
UniformDefault,
|
||||||
|
Normal(f64, f64),
|
||||||
|
Constant(f64),
|
||||||
|
Ones,
|
||||||
|
Zeros,
|
||||||
|
// TODO: add Xavier initialization
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Initializer {
|
||||||
|
pub fn init<B: Backend, const D: usize, S: Into<Shape<D>>>(&self, shape: S) -> Tensor<B, D> {
|
||||||
|
match self {
|
||||||
|
Self::Uniform(a, b) => Tensor::<B, D>::random(
|
||||||
|
shape,
|
||||||
|
Distribution::Uniform((*a).elem::<B::FloatElem>(), (*b).elem::<B::FloatElem>()),
|
||||||
|
),
|
||||||
|
Self::UniformDefault => unimplemented!("The caller should implement the default"),
|
||||||
|
Self::Normal(mean, std) => {
|
||||||
|
Tensor::<B, D>::random(shape, Distribution::Normal(*mean, *std))
|
||||||
|
}
|
||||||
|
Self::Constant(value) => Tensor::<B, D>::zeros(shape) + *value, //TODO replace with fill()
|
||||||
|
Self::Ones => Tensor::<B, D>::ones(shape),
|
||||||
|
Self::Zeros => Tensor::<B, D>::zeros(shape),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
use burn_tensor::Data;
|
||||||
|
|
||||||
|
pub type TB = burn_ndarray::NdArrayBackend<f32>;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn initializer_uniform_init() {
|
||||||
|
// seed random generator
|
||||||
|
TB::seed(0);
|
||||||
|
let (a, b) = (0.0, 1.0);
|
||||||
|
let uniform: Tensor<TB, 4> = Initializer::Uniform(a, b).init([2, 2, 2, 2]);
|
||||||
|
for item in uniform.to_data().value.iter() {
|
||||||
|
if *item < a as f32 || *item > b as f32 {
|
||||||
|
panic!("Element ({item}) is not within range ({a},{b})");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
#[should_panic]
|
||||||
|
fn initializer_uniform_default_init() {
|
||||||
|
let _: Tensor<TB, 4> = Initializer::UniformDefault.init([2, 2, 2, 2]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn initializer_normal_init() {
|
||||||
|
// seed random generator
|
||||||
|
TB::seed(0);
|
||||||
|
let (mean, std) = (0.0, 1.0);
|
||||||
|
let normal: Tensor<TB, 1> = Initializer::Normal(mean, std).init([1000]);
|
||||||
|
let (var_act, mean_act) = normal.var_mean(0);
|
||||||
|
var_act
|
||||||
|
.to_data()
|
||||||
|
.assert_approx_eq(&Data::from([std as f32]), 1);
|
||||||
|
mean_act
|
||||||
|
.to_data()
|
||||||
|
.assert_approx_eq(&Data::from([mean as f32]), 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn initializer_constant_init() {
|
||||||
|
let value = 5.0;
|
||||||
|
let constants: Tensor<TB, 4> = Initializer::Constant(value).init([2, 2, 2, 2]);
|
||||||
|
constants
|
||||||
|
.sum()
|
||||||
|
.to_data()
|
||||||
|
.assert_approx_eq(&Data::from([value as f32 * 16.0]), 3);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn initializer_zeros_init() {
|
||||||
|
let zeros: Tensor<TB, 4> = Initializer::Zeros.init([2, 2, 2, 2]);
|
||||||
|
zeros
|
||||||
|
.sum()
|
||||||
|
.to_data()
|
||||||
|
.assert_approx_eq(&Data::from([0.0]), 3);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn initializer_ones_init() {
|
||||||
|
let ones: Tensor<TB, 4> = Initializer::Ones.init([2, 2, 2, 2]);
|
||||||
|
ones.sum()
|
||||||
|
.to_data()
|
||||||
|
.assert_approx_eq(&Data::from([16.0]), 3);
|
||||||
|
}
|
||||||
|
}
|
|
@ -5,11 +5,12 @@ use crate as burn;
|
||||||
use crate::config::Config;
|
use crate::config::Config;
|
||||||
use crate::module::Module;
|
use crate::module::Module;
|
||||||
use crate::module::Param;
|
use crate::module::Param;
|
||||||
use crate::tensor::backend::Backend;
|
use crate::tensor::{backend::Backend, Tensor};
|
||||||
use crate::tensor::{Distribution, ElementConversion, Tensor};
|
|
||||||
|
|
||||||
use libm::sqrt;
|
use libm::sqrt;
|
||||||
|
|
||||||
|
use super::Initializer;
|
||||||
|
|
||||||
/// Configuration to create a [Linear](Linear) layer.
|
/// Configuration to create a [Linear](Linear) layer.
|
||||||
#[derive(Config)]
|
#[derive(Config)]
|
||||||
pub struct LinearConfig {
|
pub struct LinearConfig {
|
||||||
|
@ -20,6 +21,9 @@ pub struct LinearConfig {
|
||||||
/// If a bias should be applied during the linear transformation.
|
/// If a bias should be applied during the linear transformation.
|
||||||
#[config(default = true)]
|
#[config(default = true)]
|
||||||
pub bias: bool,
|
pub bias: bool,
|
||||||
|
/// The type of function used to initialize neural network parameters
|
||||||
|
#[config(default = "Initializer::UniformDefault")]
|
||||||
|
pub initializer: Initializer,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Applies a linear transformation to the input tensor:
|
/// Applies a linear transformation to the input tensor:
|
||||||
|
@ -43,12 +47,19 @@ impl<B: Backend> Linear<B> {
|
||||||
/// Create the module from the given configuration.
|
/// Create the module from the given configuration.
|
||||||
pub fn new(config: &LinearConfig) -> Self {
|
pub fn new(config: &LinearConfig) -> Self {
|
||||||
let k = sqrt(1.0 / config.d_input as f64);
|
let k = sqrt(1.0 / config.d_input as f64);
|
||||||
let distribution = Distribution::Uniform((-1.0 * k).elem(), k.elem());
|
|
||||||
|
|
||||||
let weight = Tensor::random([config.d_input, config.d_output], distribution);
|
let initializer = if let Initializer::UniformDefault = config.initializer {
|
||||||
let bias = match config.bias {
|
Initializer::Uniform(-k, k)
|
||||||
true => Some(Tensor::random([config.d_output], distribution)),
|
} else {
|
||||||
false => None,
|
config.initializer.clone()
|
||||||
|
};
|
||||||
|
|
||||||
|
let weight = initializer.init([config.d_input, config.d_output]);
|
||||||
|
|
||||||
|
let bias = if config.bias {
|
||||||
|
Some(initializer.init([config.d_output]))
|
||||||
|
} else {
|
||||||
|
None
|
||||||
};
|
};
|
||||||
|
|
||||||
Self {
|
Self {
|
||||||
|
@ -72,3 +83,35 @@ impl<B: Backend> Linear<B> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
pub type TB = burn_ndarray::NdArrayBackend<f32>;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn initializer_default() {
|
||||||
|
TB::seed(0);
|
||||||
|
let config = LinearConfig::new(5, 5);
|
||||||
|
let k = sqrt(1.0 / config.d_input as f64);
|
||||||
|
|
||||||
|
assert_eq!(config.initializer, Initializer::UniformDefault);
|
||||||
|
let conv: Linear<TB> = Linear::new(&config);
|
||||||
|
for item in conv.weight.to_data().value.iter() {
|
||||||
|
if *item < -k as f32 || *item > k as f32 {
|
||||||
|
panic!("Element ({item}) is not within the range of (-{k},{k})");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn initializer_zeros() {
|
||||||
|
TB::seed(0);
|
||||||
|
let config = LinearConfig::new(5, 5).with_initializer(Initializer::Zeros);
|
||||||
|
assert_eq!(config.initializer, Initializer::Zeros);
|
||||||
|
let conv: Linear<TB> = Linear::new(&config);
|
||||||
|
for item in conv.weight.to_data().value.iter() {
|
||||||
|
assert_eq!(*item, 0.0f32);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -8,6 +8,7 @@ pub mod transformer;
|
||||||
mod dropout;
|
mod dropout;
|
||||||
mod embedding;
|
mod embedding;
|
||||||
mod gelu;
|
mod gelu;
|
||||||
|
mod initializer;
|
||||||
mod linear;
|
mod linear;
|
||||||
mod norm;
|
mod norm;
|
||||||
mod relu;
|
mod relu;
|
||||||
|
@ -15,6 +16,7 @@ mod relu;
|
||||||
pub use dropout::*;
|
pub use dropout::*;
|
||||||
pub use embedding::*;
|
pub use embedding::*;
|
||||||
pub use gelu::*;
|
pub use gelu::*;
|
||||||
|
pub use initializer::*;
|
||||||
pub use linear::*;
|
pub use linear::*;
|
||||||
pub use norm::*;
|
pub use norm::*;
|
||||||
pub use relu::*;
|
pub use relu::*;
|
||||||
|
|
|
@ -106,11 +106,7 @@ mod tests {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn layer() -> Linear<TestADBackend> {
|
fn layer() -> Linear<TestADBackend> {
|
||||||
Linear::<TestADBackend>::new(&LinearConfig {
|
Linear::<TestADBackend>::new(&LinearConfig::new(20, 20).with_bias(true))
|
||||||
d_input: 20,
|
|
||||||
d_output: 20,
|
|
||||||
bias: true,
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn random_tensor() -> Tensor<TestADBackend, 2> {
|
fn random_tensor() -> Tensor<TestADBackend, 2> {
|
||||||
|
|
|
@ -117,11 +117,7 @@ mod tests {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn layer() -> Linear<TestADBackend> {
|
fn layer() -> Linear<TestADBackend> {
|
||||||
Linear::<TestADBackend>::new(&LinearConfig {
|
Linear::<TestADBackend>::new(&LinearConfig::new(20, 20).with_bias(true))
|
||||||
d_input: 20,
|
|
||||||
d_output: 20,
|
|
||||||
bias: true,
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn random_tensor() -> Tensor<TestADBackend, 2> {
|
fn random_tensor() -> Tensor<TestADBackend, 2> {
|
||||||
|
|
|
@ -170,11 +170,7 @@ mod tests {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn layer() -> Linear<TestADBackend> {
|
fn layer() -> Linear<TestADBackend> {
|
||||||
Linear::<TestADBackend>::new(&LinearConfig {
|
Linear::<TestADBackend>::new(&LinearConfig::new(20, 20).with_bias(true))
|
||||||
d_input: 20,
|
|
||||||
d_output: 20,
|
|
||||||
bias: true,
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn sgd_with_all() -> Sgd<TestADBackend> {
|
fn sgd_with_all() -> Sgd<TestADBackend> {
|
||||||
|
|
Loading…
Reference in New Issue