feat: basic setup

This commit is contained in:
nathaniel 2022-08-20 20:20:56 -04:00
parent 0f38096abe
commit 39be42b2c6
5 changed files with 184 additions and 9 deletions

View File

@ -16,6 +16,7 @@ tch = ["burn-tensor/tch"]
ndarray = ["burn-tensor/ndarray"]
[dependencies]
num-traits = "0.2"
burn-tensor = { path = "./burn-tensor", version = "0.1.0", default-features = false }
burn-derive = { path = "./burn-derive", version = "0.1.0" }
rand = "0.8"

113
examples/mnist.rs Normal file
View File

@ -0,0 +1,113 @@
use burn::module::{Forward, Module, Param};
use burn::nn;
use burn::optim::SGDOptimizer;
use burn::tensor::af::relu;
use burn::tensor::back::{ad, Backend};
use burn::tensor::losses::cross_entropy_with_logits;
use burn::tensor::{Distribution, Shape, Tensor};
#[derive(Module, Debug)]
struct Model<B>
where
B: Backend,
{
mlp: Param<MLP<B>>,
input: Param<nn::Linear<B>>,
output: Param<nn::Linear<B>>,
}
#[derive(Module, Debug)]
struct MLP<B>
where
B: Backend,
{
linears: Param<Vec<nn::Linear<B>>>,
}
impl<B: Backend> Forward<Tensor<B, 2>, Tensor<B, 2>> for MLP<B> {
fn forward(&self, input: Tensor<B, 2>) -> Tensor<B, 2> {
let mut x = input;
for linear in self.linears.iter() {
x = linear.forward(x);
x = relu(&x);
}
x
}
}
impl<B: Backend> Forward<Tensor<B, 2>, Tensor<B, 2>> for Model<B> {
fn forward(&self, input: Tensor<B, 2>) -> Tensor<B, 2> {
let mut x = input;
x = self.input.forward(x);
x = self.mlp.forward(x);
x = self.output.forward(x);
x
}
}
impl<B: Backend> MLP<B> {
fn new(dim: usize, num_layers: usize) -> Self {
let mut linears = Vec::with_capacity(num_layers);
for _ in 0..num_layers {
let config = nn::LinearConfig {
d_input: dim,
d_output: dim,
bias: true,
};
let linear = nn::Linear::new(&config);
linears.push(linear);
}
Self {
linears: Param::new(linears),
}
}
}
impl<B: Backend> Model<B> {
fn new(d_input: usize, d_hidden: usize, num_layers: usize, num_classes: usize) -> Self {
let mlp = MLP::new(d_hidden, num_layers);
let config_input = nn::LinearConfig {
d_input,
d_output: d_hidden,
bias: true,
};
let config_output = nn::LinearConfig {
d_input: d_hidden,
d_output: num_classes,
bias: true,
};
let output = nn::Linear::new(&config_output);
let input = nn::Linear::new(&config_input);
Self {
mlp: Param::new(mlp),
output: Param::new(output),
input: Param::new(input),
}
}
}
fn run<B: ad::Backend>() {
let mut model: Model<B> = Model::new(768, 256, 2, 10);
let mut optim: SGDOptimizer<B> = SGDOptimizer::new(5.0e-4);
let input = Tensor::<B, 2>::random(Shape::new([32, 768]), Distribution::Standard);
let targets = Tensor::<B, 2>::random(Shape::new([32, 10]), Distribution::Standard);
let output = model.forward(input);
let loss = cross_entropy_with_logits(&output, &targets);
let grads = loss.backward();
model.update_params(&grads, &mut optim);
println!("loss {:?}", loss.to_data());
}
fn main() {
run::<ad::Tch<f32>>();
}

View File

@ -158,3 +158,53 @@ impl<M: Module> Param<M> {
self.value.load_from_parent(name, state);
}
}
impl<M: Module> Param<Vec<M>> {
pub fn num_params(&self) -> usize {
let mut num_params = 0;
for module in self.value.iter() {
num_params += module.num_params();
}
num_params
}
pub fn update_params<O: Optimizer<M::Backend>>(&mut self, grads: &Gradients, optim: &mut O)
where
M::Backend: back::ad::Backend,
{
for module in self.value.iter_mut() {
module.update_params(grads, optim);
}
}
pub fn devices(&self) -> Vec<<M::Backend as back::Backend>::Device> {
let mut devices = Vec::new();
for module in self.value.iter() {
devices.append(&mut module.devices());
}
devices
}
pub fn to_device(&mut self, device: <M::Backend as back::Backend>::Device) {
for module in self.value.iter_mut() {
module.to_device(device);
}
}
pub fn state(&self, name: &str) -> State<M::Backend>
where
<M::Backend as back::Backend>::Elem: Serialize,
<M::Backend as back::Backend>::Elem: DeserializeOwned,
{
todo!();
}
pub fn load_from_parent(&mut self, name: &str, state: &State<M::Backend>)
where
<M::Backend as back::Backend>::Elem: Serialize,
<M::Backend as back::Backend>::Elem: DeserializeOwned,
{
todo!();
}
}

View File

@ -4,12 +4,13 @@ use crate::module::Module;
use crate::module::{Forward, Param};
use crate::tensor::back::Backend;
use crate::tensor::{Distribution, Shape, Tensor};
use num_traits::FromPrimitive;
use std::ops::Deref;
pub struct LinearConfig {
d_input: usize,
d_output: usize,
bias: bool,
pub d_input: usize,
pub d_output: usize,
pub bias: bool,
}
#[derive(Module, Debug)]
@ -23,10 +24,12 @@ where
impl<B: Backend> Linear<B> {
pub fn new(config: &LinearConfig) -> Self {
let weight = Tensor::random(
Shape::new([config.d_input, config.d_output]),
Distribution::Standard,
// Glorot init
let distribution = Distribution::Uniform(
<B as Backend>::Elem::from_f64(-1.0 / f64::sqrt(config.d_input as f64)).unwrap(),
<B as Backend>::Elem::from_f64(1.0 / f64::sqrt(config.d_input as f64)).unwrap(),
);
let weight = Tensor::random(Shape::new([config.d_input, config.d_output]), distribution);
let bias = match config.bias {
true => Some(Tensor::zeros(Shape::new([config.d_output]))),
false => None,
@ -39,9 +42,9 @@ impl<B: Backend> Linear<B> {
}
}
impl<B: Backend, const D: usize> Forward<&Tensor<B, D>, Tensor<B, D>> for Linear<B> {
fn forward(&self, input: &Tensor<B, D>) -> Tensor<B, D> {
let output = self.weight.unsqueeze().matmul(input);
impl<B: Backend, const D: usize> Forward<Tensor<B, D>, Tensor<B, D>> for Linear<B> {
fn forward(&self, input: Tensor<B, D>) -> Tensor<B, D> {
let output = input.matmul(&self.weight.unsqueeze());
match self.bias.deref() {
Some(bias) => output + bias.unsqueeze(),

View File

@ -2,11 +2,19 @@ use crate::optim::Optimizer;
use crate::tensor::back;
use crate::tensor::Gradients;
use crate::tensor::Tensor;
use num_traits::cast::FromPrimitive;
pub struct SGDOptimizer<B: back::ad::Backend> {
learning_rate: <B::InnerBackend as back::Backend>::Elem,
}
impl<B: back::ad::Backend> SGDOptimizer<B> {
pub fn new(learning_rate: f64) -> Self {
let learning_rate =
<B::InnerBackend as back::Backend>::Elem::from_f64(learning_rate).unwrap();
Self { learning_rate }
}
}
impl<B: back::ad::Backend> Optimizer<B> for SGDOptimizer<B> {
fn update<const D: usize>(&mut self, tensor: &mut Tensor<B, D>, grads: &Gradients) {
let grad = tensor.grad(&grads).unwrap();