mirror of https://github.com/tracel-ai/burn.git
Explicit device tensors (#1081)
This commit is contained in:
parent
7c6f017c98
commit
1fd07fcb4a
|
@ -23,8 +23,8 @@ impl<B: Backend, const D: usize> Benchmark for BinaryBenchmark<B, D> {
|
|||
}
|
||||
|
||||
fn prepare(&self) -> Self::Args {
|
||||
let lhs = Tensor::random_device(self.shape.clone(), Distribution::Default, &self.device);
|
||||
let rhs = Tensor::random_device(self.shape.clone(), Distribution::Default, &self.device);
|
||||
let lhs = Tensor::random(self.shape.clone(), Distribution::Default, &self.device);
|
||||
let rhs = Tensor::random(self.shape.clone(), Distribution::Default, &self.device);
|
||||
|
||||
(lhs, rhs)
|
||||
}
|
||||
|
|
|
@ -39,7 +39,7 @@ impl<B: Backend, const D: usize> Benchmark for CustomGeluBenchmark<B, D> {
|
|||
}
|
||||
|
||||
fn prepare(&self) -> Self::Args {
|
||||
Tensor::random_device(self.shape.clone(), Distribution::Default, &self.device)
|
||||
Tensor::random(self.shape.clone(), Distribution::Default, &self.device)
|
||||
}
|
||||
|
||||
fn sync(&self) {
|
||||
|
|
|
@ -24,7 +24,7 @@ impl<B: Backend, const D: usize> Benchmark for ToDataBenchmark<B, D> {
|
|||
}
|
||||
|
||||
fn prepare(&self) -> Self::Args {
|
||||
Tensor::random_device(self.shape.clone(), Distribution::Default, &self.device)
|
||||
Tensor::random(self.shape.clone(), Distribution::Default, &self.device)
|
||||
}
|
||||
|
||||
fn sync(&self) {
|
||||
|
@ -48,7 +48,7 @@ impl<B: Backend, const D: usize> Benchmark for FromDataBenchmark<B, D> {
|
|||
|
||||
fn execute(&self, (data, device): Self::Args) {
|
||||
for _ in 0..self.num_repeats {
|
||||
let _data = Tensor::<B, D>::from_data_device(data.clone(), &device);
|
||||
let _data = Tensor::<B, D>::from_data(data.clone(), &device);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -32,10 +32,8 @@ impl<B: Backend, const D: usize> Benchmark for MatmulBenchmark<B, D> {
|
|||
}
|
||||
|
||||
fn prepare(&self) -> Self::Args {
|
||||
let lhs =
|
||||
Tensor::random_device(self.shape_lhs.clone(), Distribution::Default, &self.device);
|
||||
let rhs =
|
||||
Tensor::random_device(self.shape_rhs.clone(), Distribution::Default, &self.device);
|
||||
let lhs = Tensor::random(self.shape_lhs.clone(), Distribution::Default, &self.device);
|
||||
let rhs = Tensor::random(self.shape_rhs.clone(), Distribution::Default, &self.device);
|
||||
|
||||
(lhs, rhs)
|
||||
}
|
||||
|
|
|
@ -25,7 +25,7 @@ impl<B: Backend, const D: usize> Benchmark for UnaryBenchmark<B, D> {
|
|||
}
|
||||
|
||||
fn prepare(&self) -> Self::Args {
|
||||
Tensor::random_device(self.shape.clone(), Distribution::Default, &self.device)
|
||||
Tensor::random(self.shape.clone(), Distribution::Default, &self.device)
|
||||
}
|
||||
|
||||
fn sync(&self) {
|
||||
|
|
|
@ -8,8 +8,8 @@ mod tests {
|
|||
let data_1 = Data::<f32, 2>::from([[0.0, -1.0], [3.0, 4.0]]);
|
||||
let data_2 = Data::<f32, 2>::from([[6.0, 7.0], [9.0, -10.0]]);
|
||||
|
||||
let tensor_1 = TestAutodiffTensor::from_data(data_1).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::from_data(data_2).require_grad();
|
||||
let tensor_1 = TestAutodiffTensor::from_data_devauto(data_1).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::from_data_devauto(data_2).require_grad();
|
||||
|
||||
let tensor_3 = tensor_1.clone().matmul(tensor_2.clone().abs());
|
||||
let tensor_4 = tensor_3.matmul(tensor_2.clone());
|
||||
|
|
|
@ -13,7 +13,7 @@ mod tests {
|
|||
output_size: 3,
|
||||
};
|
||||
|
||||
test.assert_output(TestTensor::from_floats([[
|
||||
test.assert_output(TestTensor::from_floats_devauto([[
|
||||
[0.5000, 0.8333, 0.3333, 0.8333, 0.5000],
|
||||
[0.5000, 0.8333, 0.3333, 0.8333, 0.5000],
|
||||
]]));
|
||||
|
@ -29,8 +29,8 @@ mod tests {
|
|||
impl AdaptiveAvgPool1dTestCase {
|
||||
fn assert_output(self, x_grad: TestTensor<3>) {
|
||||
let shape_x = Shape::new([self.batch_size, self.channels, self.length]);
|
||||
let x = TestAutodiffTensor::from_data(
|
||||
TestTensorInt::arange(0..shape_x.num_elements())
|
||||
let x = TestAutodiffTensor::from_data_devauto(
|
||||
TestTensorInt::arange_devauto(0..shape_x.num_elements())
|
||||
.reshape(shape_x)
|
||||
.into_data()
|
||||
.convert(),
|
||||
|
|
|
@ -15,7 +15,7 @@ mod tests {
|
|||
output_size_2: 2,
|
||||
};
|
||||
|
||||
test.assert_output(TestTensor::from_floats([[
|
||||
test.assert_output(TestTensor::from_floats_devauto([[
|
||||
[
|
||||
[0.2500, 0.5000, 0.2500],
|
||||
[0.4167, 0.8333, 0.4167],
|
||||
|
@ -45,8 +45,8 @@ mod tests {
|
|||
impl AdaptiveAvgPool2dTestCase {
|
||||
fn assert_output(self, x_grad: TestTensor<4>) {
|
||||
let shape_x = Shape::new([self.batch_size, self.channels, self.height, self.width]);
|
||||
let x = TestAutodiffTensor::from_data(
|
||||
TestTensorInt::arange(0..shape_x.num_elements())
|
||||
let x = TestAutodiffTensor::from_data_devauto(
|
||||
TestTensorInt::arange_devauto(0..shape_x.num_elements())
|
||||
.reshape(shape_x)
|
||||
.into_data()
|
||||
.convert(),
|
||||
|
|
|
@ -5,8 +5,8 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn should_diff_add() {
|
||||
let tensor_1 = TestAutodiffTensor::from_floats([2.0, 5.0]).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::from_floats([4.0, 1.0]).require_grad();
|
||||
let tensor_1 = TestAutodiffTensor::from_floats_devauto([2.0, 5.0]).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::from_floats_devauto([4.0, 1.0]).require_grad();
|
||||
|
||||
let tensor_3 = tensor_1.clone() + tensor_2.clone();
|
||||
let grads = tensor_3.backward();
|
||||
|
@ -23,7 +23,7 @@ mod tests {
|
|||
fn should_diff_add_scalar() {
|
||||
let data = Data::from([2.0, 10.0]);
|
||||
|
||||
let tensor = TestAutodiffTensor::from_data(data).require_grad();
|
||||
let tensor = TestAutodiffTensor::from_data_devauto(data).require_grad();
|
||||
let tensor_out = tensor.clone().add_scalar(5.0);
|
||||
let grads = tensor_out.backward();
|
||||
|
||||
|
@ -39,9 +39,9 @@ mod tests {
|
|||
let data_2: Data<f32, 2> = Data::from([[4.0, 7.0], [2.0, 3.0]]);
|
||||
let data_3: Data<f32, 2> = Data::from([[2.0, 2.0], [2.0, 2.0]]);
|
||||
|
||||
let tensor_1 = TestAutodiffTensor::from_data(data_1).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::from_data(data_2).require_grad();
|
||||
let tensor_3 = TestAutodiffTensor::from_data(data_3).require_grad();
|
||||
let tensor_1 = TestAutodiffTensor::from_data_devauto(data_1).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::from_data_devauto(data_2).require_grad();
|
||||
let tensor_3 = TestAutodiffTensor::from_data_devauto(data_3).require_grad();
|
||||
|
||||
let tensor_4 = tensor_1.clone().add(tensor_2.clone());
|
||||
let tensor_5 = tensor_4
|
||||
|
|
|
@ -8,8 +8,8 @@ mod tests {
|
|||
let data_1 = Data::<f32, 2>::from([[1.0, 7.0], [-2.0, -3.0]]);
|
||||
let data_2 = Data::<f32, 2>::from([[4.0, -7.0], [2.0, 3.0]]);
|
||||
|
||||
let tensor_1 = TestAutodiffTensor::from_data(data_1).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::from_data(data_2).require_grad();
|
||||
let tensor_1 = TestAutodiffTensor::from_data_devauto(data_1).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::from_data_devauto(data_2).require_grad();
|
||||
|
||||
let tensor_3 = tensor_1.clone().matmul(tensor_2.clone());
|
||||
let tensor_4 = tensor_1.clone().mul(tensor_3.mean().unsqueeze());
|
||||
|
@ -31,8 +31,8 @@ mod tests {
|
|||
let data_1 = Data::<f32, 2>::from([[1.0, 7.0], [-2.0, -3.0]]);
|
||||
let data_2 = Data::<f32, 2>::from([[4.0, -7.0], [2.0, 3.0]]);
|
||||
|
||||
let tensor_1 = TestAutodiffTensor::from_data(data_1).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::from_data(data_2).require_grad();
|
||||
let tensor_1 = TestAutodiffTensor::from_data_devauto(data_1).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::from_data_devauto(data_2).require_grad();
|
||||
|
||||
let tensor_3 = tensor_1.clone().matmul(tensor_2.clone());
|
||||
let tensor_4 = tensor_1.clone().mul(tensor_3.sum().unsqueeze());
|
||||
|
@ -54,8 +54,8 @@ mod tests {
|
|||
let data_1 = Data::from([[0.0, 1.0], [3.0, 4.0]]);
|
||||
let data_2 = Data::from([[6.0, 7.0], [9.0, 10.0]]);
|
||||
|
||||
let tensor_1 = TestAutodiffTensor::from_data(data_1).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::from_data(data_2).require_grad();
|
||||
let tensor_1 = TestAutodiffTensor::from_data_devauto(data_1).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::from_data_devauto(data_2).require_grad();
|
||||
|
||||
let tensor_3 = tensor_1.clone().matmul(tensor_2.clone());
|
||||
let tensor_4 = tensor_3.clone().sum_dim(1);
|
||||
|
@ -78,8 +78,8 @@ mod tests {
|
|||
let data_1 = Data::<f32, 2>::from([[1.0, 7.0], [-2.0, -3.0]]);
|
||||
let data_2 = Data::<f32, 2>::from([[4.0, -7.0], [2.0, 3.0]]);
|
||||
|
||||
let tensor_1 = TestAutodiffTensor::from_data(data_1).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::from_data(data_2).require_grad();
|
||||
let tensor_1 = TestAutodiffTensor::from_data_devauto(data_1).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::from_data_devauto(data_2).require_grad();
|
||||
|
||||
let tensor_3 = tensor_1.clone().matmul(tensor_2.clone());
|
||||
let tensor_4 = tensor_1.clone().mul(tensor_3.mean_dim(1).unsqueeze());
|
||||
|
@ -101,8 +101,8 @@ mod tests {
|
|||
let data_1 = Data::<f32, 2>::from([[1.0, 7.0], [-2.0, -3.0]]);
|
||||
let data_2 = Data::<f32, 2>::from([[4.0, -7.0], [2.0, 3.0]]);
|
||||
|
||||
let tensor_1 = TestAutodiffTensor::from_data(data_1).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::from_data(data_2).require_grad();
|
||||
let tensor_1 = TestAutodiffTensor::from_data_devauto(data_1).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::from_data_devauto(data_2).require_grad();
|
||||
|
||||
let tensor_3 = tensor_1.clone().matmul(tensor_2.clone());
|
||||
let tensor_4 = tensor_1.clone().mul(tensor_3.sum_dim(1).unsqueeze());
|
||||
|
|
|
@ -16,7 +16,7 @@ mod tests {
|
|||
count_include_pad: true,
|
||||
};
|
||||
|
||||
test.assert_output(TestTensor::from_floats([[[
|
||||
test.assert_output(TestTensor::from_floats_devauto([[[
|
||||
0.3333, 0.6667, 1.0000, 1.0000, 0.6667, 0.3333,
|
||||
]]]));
|
||||
}
|
||||
|
@ -33,7 +33,7 @@ mod tests {
|
|||
count_include_pad: true,
|
||||
};
|
||||
|
||||
test.assert_output(TestTensor::from_floats([[
|
||||
test.assert_output(TestTensor::from_floats_devauto([[
|
||||
[0.3333, 0.6667, 0.3333, 0.6667, 0.3333, 0.3333],
|
||||
[0.3333, 0.6667, 0.3333, 0.6667, 0.3333, 0.3333],
|
||||
]]));
|
||||
|
@ -51,7 +51,7 @@ mod tests {
|
|||
count_include_pad: false,
|
||||
};
|
||||
|
||||
test.assert_output(TestTensor::from_floats([[
|
||||
test.assert_output(TestTensor::from_floats_devauto([[
|
||||
[0.5000, 0.8333, 0.3333, 0.6667, 0.3333, 0.3333],
|
||||
[0.5000, 0.8333, 0.3333, 0.6667, 0.3333, 0.3333],
|
||||
]]));
|
||||
|
@ -70,8 +70,8 @@ mod tests {
|
|||
impl AvgPool1dTestCase {
|
||||
fn assert_output(self, x_grad: TestTensor<3>) {
|
||||
let shape_x = Shape::new([self.batch_size, self.channels, self.length]);
|
||||
let x = TestAutodiffTensor::from_data(
|
||||
TestTensorInt::arange(0..shape_x.num_elements())
|
||||
let x = TestAutodiffTensor::from_data_devauto(
|
||||
TestTensorInt::arange_devauto(0..shape_x.num_elements())
|
||||
.reshape(shape_x)
|
||||
.into_data()
|
||||
.convert(),
|
||||
|
|
|
@ -20,7 +20,7 @@ mod tests {
|
|||
count_include_pad: true,
|
||||
};
|
||||
|
||||
test.assert_output(TestTensor::from_floats([[[
|
||||
test.assert_output(TestTensor::from_floats_devauto([[[
|
||||
[0.1111, 0.2222, 0.3333, 0.3333, 0.2222, 0.1111],
|
||||
[0.2222, 0.4444, 0.6667, 0.6667, 0.4444, 0.2222],
|
||||
[0.3333, 0.6667, 1.0000, 1.0000, 0.6667, 0.3333],
|
||||
|
@ -46,7 +46,7 @@ mod tests {
|
|||
count_include_pad: true,
|
||||
};
|
||||
|
||||
test.assert_output(TestTensor::from_floats([[[
|
||||
test.assert_output(TestTensor::from_floats_devauto([[[
|
||||
[0.3333, 0.3333, 0.3333, 0.3333, 0.3333, 0.3333],
|
||||
[0.5000, 0.5000, 0.5000, 0.5000, 0.5000, 0.5000],
|
||||
[0.5000, 0.5000, 0.5000, 0.5000, 0.5000, 0.5000],
|
||||
|
@ -70,7 +70,7 @@ mod tests {
|
|||
count_include_pad: false,
|
||||
};
|
||||
|
||||
test.assert_output(TestTensor::from_floats([[[
|
||||
test.assert_output(TestTensor::from_floats_devauto([[[
|
||||
[0.6250, 0.6250, 0.4167, 0.4167, 0.6250, 0.6250],
|
||||
[0.8750, 0.8750, 0.5833, 0.5833, 0.8750, 0.8750],
|
||||
[0.8750, 0.8750, 0.5833, 0.5833, 0.8750, 0.8750],
|
||||
|
@ -95,8 +95,8 @@ mod tests {
|
|||
impl AvgPool2dTestCase {
|
||||
fn assert_output(self, x_grad: TestTensor<4>) {
|
||||
let shape_x = Shape::new([self.batch_size, self.channels, self.height, self.width]);
|
||||
let x = TestAutodiffTensor::from_data(
|
||||
TestTensorInt::arange(0..shape_x.num_elements())
|
||||
let x = TestAutodiffTensor::from_data_devauto(
|
||||
TestTensorInt::arange_devauto(0..shape_x.num_elements())
|
||||
.reshape(shape_x)
|
||||
.into_data()
|
||||
.convert(),
|
||||
|
|
|
@ -11,9 +11,9 @@ mod tests {
|
|||
[[1.0, 2.0], [4.0, 5.0], [3.0, 4.0]],
|
||||
[[4.0, 5.0], [8.0, 5.0], [1.0, 9.0]],
|
||||
]);
|
||||
let weights = Tensor::<TestAutodiffBackend, 2>::from_data(weights).require_grad();
|
||||
let indices = Tensor::<TestAutodiffBackend, 2, Int>::from_data(indices);
|
||||
let x = Tensor::<TestAutodiffBackend, 3>::from_data(x).require_grad();
|
||||
let weights = Tensor::<TestAutodiffBackend, 2>::from_data_devauto(weights).require_grad();
|
||||
let indices = Tensor::<TestAutodiffBackend, 2, Int>::from_data_devauto(indices);
|
||||
let x = Tensor::<TestAutodiffBackend, 3>::from_data_devauto(x).require_grad();
|
||||
|
||||
let output = embedding(weights.clone(), indices);
|
||||
let output = output.matmul(x);
|
||||
|
|
|
@ -37,8 +37,8 @@ mod tests {
|
|||
where
|
||||
F: Fn(TestAutodiffTensor<3>, TestAutodiffTensor<3>) -> TestAutodiffTensor<3>,
|
||||
{
|
||||
let w = TestAutodiffTensor::zeros([16, 5, 5]).require_grad();
|
||||
let x = TestAutodiffTensor::zeros([4, 5, 5]).require_grad();
|
||||
let w = TestAutodiffTensor::zeros_devauto([16, 5, 5]).require_grad();
|
||||
let x = TestAutodiffTensor::zeros_devauto([4, 5, 5]).require_grad();
|
||||
|
||||
// Slice isn't a broadcastable operation, so it will fail when the previous backward pass
|
||||
// of an operation that support broadcast doesn't support it during the backward pass.
|
||||
|
|
|
@ -5,8 +5,10 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn should_diff_cat() {
|
||||
let tensor_1 = TestAutodiffTensor::from_data([[2.0, -1.0], [5.0, 2.0]]).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::from_data([[5.0, 4.0], [-1.0, 4.0]]).require_grad();
|
||||
let tensor_1 =
|
||||
TestAutodiffTensor::from_data_devauto([[2.0, -1.0], [5.0, 2.0]]).require_grad();
|
||||
let tensor_2 =
|
||||
TestAutodiffTensor::from_data_devauto([[5.0, 4.0], [-1.0, 4.0]]).require_grad();
|
||||
|
||||
let tensor_3 = tensor_1.clone().matmul(tensor_2.clone());
|
||||
let grads = tensor_3.backward();
|
||||
|
@ -57,9 +59,10 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn should_diff_cat_more_than_1_dim() {
|
||||
let tensor_1 = TestAutodiffTensor::from_data([[2.0, -1.0], [5.0, 2.0]]).require_grad();
|
||||
let tensor_2 =
|
||||
TestAutodiffTensor::from_data([[5.0, 4.0], [-1.0, 4.0], [4.0, 1.0]]).require_grad();
|
||||
let tensor_1 =
|
||||
TestAutodiffTensor::from_data_devauto([[2.0, -1.0], [5.0, 2.0]]).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::from_data_devauto([[5.0, 4.0], [-1.0, 4.0], [4.0, 1.0]])
|
||||
.require_grad();
|
||||
|
||||
// Concat a tensor [2, 2] with another tensor [3, 2] along dim 0.
|
||||
// The resulting tensor should be [5, 2]
|
||||
|
|
|
@ -8,8 +8,8 @@ mod tests {
|
|||
let data_1: Data<f32, 2> = Data::from([[1.0, 7.0], [13.0, -3.0]]);
|
||||
let data_2: Data<f32, 2> = Data::from([[4.0, 7.0], [2.0, 3.0]]);
|
||||
|
||||
let tensor_1 = TestAutodiffTensor::from_data(data_1).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::from_data(data_2).require_grad();
|
||||
let tensor_1 = TestAutodiffTensor::from_data_devauto(data_1).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::from_data_devauto(data_2).require_grad();
|
||||
|
||||
let tensor_3 = tensor_1.clone().matmul(tensor_2.clone());
|
||||
let tensor_4 = tensor_3.matmul(tensor_1.clone());
|
||||
|
@ -35,8 +35,8 @@ mod tests {
|
|||
let data_1: Data<f32, 2> = Data::from([[1.0, 7.0], [13.0, -3.0]]);
|
||||
let data_2: Data<f32, 2> = Data::from([[4.0, 7.0], [2.0, 3.0]]);
|
||||
|
||||
let tensor_1 = TestAutodiffTensor::from_data(data_1).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::from_data(data_2).require_grad();
|
||||
let tensor_1 = TestAutodiffTensor::from_data_devauto(data_1).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::from_data_devauto(data_2).require_grad();
|
||||
|
||||
let tensor_3 = tensor_1.clone().matmul(tensor_2.clone());
|
||||
let tensor_4 = tensor_3.matmul(tensor_1.clone());
|
||||
|
@ -59,8 +59,8 @@ mod tests {
|
|||
let data_1: Data<f32, 2> = Data::from([[1.0, 7.0], [13.0, -3.0]]);
|
||||
let data_2: Data<f32, 2> = Data::from([[4.0, 7.0], [2.0, 3.0]]);
|
||||
|
||||
let tensor_1 = TestAutodiffTensor::from_data(data_1).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::from_data(data_2).require_grad();
|
||||
let tensor_1 = TestAutodiffTensor::from_data_devauto(data_1).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::from_data_devauto(data_2).require_grad();
|
||||
|
||||
let tensor_3 = tensor_1.clone().matmul(tensor_2.clone());
|
||||
let tensor_4 = tensor_3.matmul(tensor_1.clone());
|
||||
|
|
|
@ -17,15 +17,15 @@ mod tests {
|
|||
length: 4,
|
||||
};
|
||||
let grads = Grads {
|
||||
x: TestTensor::from_floats([
|
||||
x: TestTensor::from_floats_devauto([
|
||||
[[14., 24., 24., 18.], [26., 42., 42., 30.]],
|
||||
[[14., 24., 24., 18.], [26., 42., 42., 30.]],
|
||||
]),
|
||||
weight: TestTensor::from_floats([
|
||||
weight: TestTensor::from_floats_devauto([
|
||||
[[30., 44., 36.], [54., 76., 60.]],
|
||||
[[30., 44., 36.], [54., 76., 60.]],
|
||||
]),
|
||||
bias: TestTensor::from_floats([8., 8.]),
|
||||
bias: TestTensor::from_floats_devauto([8., 8.]),
|
||||
};
|
||||
test.assert_grads(grads);
|
||||
}
|
||||
|
@ -44,16 +44,16 @@ mod tests {
|
|||
length: 4,
|
||||
};
|
||||
let grads = Grads {
|
||||
x: TestTensor::from_floats([
|
||||
x: TestTensor::from_floats_devauto([
|
||||
[[39., 63., 63., 45.], [57., 90., 90., 63.]],
|
||||
[[39., 63., 63., 45.], [57., 90., 90., 63.]],
|
||||
]),
|
||||
weight: TestTensor::from_floats([
|
||||
weight: TestTensor::from_floats_devauto([
|
||||
[[30., 44., 36.], [54., 76., 60.]],
|
||||
[[30., 44., 36.], [54., 76., 60.]],
|
||||
[[30., 44., 36.], [54., 76., 60.]],
|
||||
]),
|
||||
bias: TestTensor::from_floats([8., 8., 8.]),
|
||||
bias: TestTensor::from_floats_devauto([8., 8., 8.]),
|
||||
};
|
||||
test.assert_grads(grads);
|
||||
}
|
||||
|
@ -72,15 +72,15 @@ mod tests {
|
|||
length: 4,
|
||||
};
|
||||
let grads = Grads {
|
||||
x: TestTensor::from_floats([
|
||||
x: TestTensor::from_floats_devauto([
|
||||
[[24., 24., 24., 24.], [42., 42., 42., 42.]],
|
||||
[[24., 24., 24., 24.], [42., 42., 42., 42.]],
|
||||
]),
|
||||
weight: TestTensor::from_floats([
|
||||
weight: TestTensor::from_floats_devauto([
|
||||
[[44., 44., 44.], [76., 76., 76.]],
|
||||
[[44., 44., 44.], [76., 76., 76.]],
|
||||
]),
|
||||
bias: TestTensor::from_floats([12., 12.]),
|
||||
bias: TestTensor::from_floats_devauto([12., 12.]),
|
||||
};
|
||||
test.assert_grads(grads);
|
||||
}
|
||||
|
@ -99,15 +99,15 @@ mod tests {
|
|||
length: 4,
|
||||
};
|
||||
let grads = Grads {
|
||||
x: TestTensor::from_floats([
|
||||
x: TestTensor::from_floats_devauto([
|
||||
[[8., 16., 8., 10.], [14., 28., 14., 16.]],
|
||||
[[8., 16., 8., 10.], [14., 28., 14., 16.]],
|
||||
]),
|
||||
weight: TestTensor::from_floats([
|
||||
weight: TestTensor::from_floats_devauto([
|
||||
[[10., 20., 24.], [18., 36., 40.]],
|
||||
[[10., 20., 24.], [18., 36., 40.]],
|
||||
]),
|
||||
bias: TestTensor::from_floats([4., 4.]),
|
||||
bias: TestTensor::from_floats_devauto([4., 4.]),
|
||||
};
|
||||
test.assert_grads(grads);
|
||||
}
|
||||
|
@ -126,15 +126,15 @@ mod tests {
|
|||
length: 4,
|
||||
};
|
||||
let grads = Grads {
|
||||
x: TestTensor::from_floats([
|
||||
x: TestTensor::from_floats_devauto([
|
||||
[[6., 8., 8., 10.], [12., 14., 14., 16.]],
|
||||
[[6., 8., 8., 10.], [12., 14., 14., 16.]],
|
||||
]),
|
||||
weight: TestTensor::from_floats([
|
||||
weight: TestTensor::from_floats_devauto([
|
||||
[[8., 22., 14.], [16., 38., 22.]],
|
||||
[[8., 22., 14.], [16., 38., 22.]],
|
||||
]),
|
||||
bias: TestTensor::from_floats([4., 4.]),
|
||||
bias: TestTensor::from_floats_devauto([4., 4.]),
|
||||
};
|
||||
test.assert_grads(grads);
|
||||
}
|
||||
|
@ -153,12 +153,12 @@ mod tests {
|
|||
length: 4,
|
||||
};
|
||||
let grads = Grads {
|
||||
x: TestTensor::from_floats([
|
||||
x: TestTensor::from_floats_devauto([
|
||||
[[1., 3., 3., 3.], [7., 12., 12., 9.]],
|
||||
[[1., 3., 3., 3.], [7., 12., 12., 9.]],
|
||||
]),
|
||||
weight: TestTensor::from_floats([[[30., 44., 36.]], [[54., 76., 60.]]]),
|
||||
bias: TestTensor::from_floats([8., 8.]),
|
||||
weight: TestTensor::from_floats_devauto([[[30., 44., 36.]], [[54., 76., 60.]]]),
|
||||
bias: TestTensor::from_floats_devauto([8., 8.]),
|
||||
};
|
||||
test.assert_grads(grads);
|
||||
}
|
||||
|
@ -189,21 +189,21 @@ mod tests {
|
|||
self.channels_in / self.groups,
|
||||
self.kernel_size,
|
||||
]);
|
||||
let weight = TestAutodiffTensor::from_data(
|
||||
TestTensorInt::arange(0..shape_weight.num_elements())
|
||||
let weight = TestAutodiffTensor::from_data_devauto(
|
||||
TestTensorInt::arange_devauto(0..shape_weight.num_elements())
|
||||
.reshape(shape_weight)
|
||||
.into_data()
|
||||
.convert(),
|
||||
)
|
||||
.require_grad();
|
||||
let bias = TestAutodiffTensor::from_data(
|
||||
TestTensorInt::arange(0..self.channels_out)
|
||||
let bias = TestAutodiffTensor::from_data_devauto(
|
||||
TestTensorInt::arange_devauto(0..self.channels_out)
|
||||
.into_data()
|
||||
.convert(),
|
||||
)
|
||||
.require_grad();
|
||||
let x = TestAutodiffTensor::from_data(
|
||||
TestTensorInt::arange(0..shape_x.num_elements())
|
||||
let x = TestAutodiffTensor::from_data_devauto(
|
||||
TestTensorInt::arange_devauto(0..shape_x.num_elements())
|
||||
.reshape(shape_x)
|
||||
.into_data()
|
||||
.convert(),
|
||||
|
|
|
@ -22,7 +22,7 @@ mod tests {
|
|||
width: 4,
|
||||
};
|
||||
let grads = Grads {
|
||||
x: TestTensor::from_floats([
|
||||
x: TestTensor::from_floats_devauto([
|
||||
[
|
||||
[
|
||||
[88., 138., 138., 96.],
|
||||
|
@ -52,7 +52,7 @@ mod tests {
|
|||
],
|
||||
],
|
||||
]),
|
||||
weight: TestTensor::from_floats([
|
||||
weight: TestTensor::from_floats_devauto([
|
||||
[
|
||||
[[378., 516., 396.], [552., 752., 576.], [450., 612., 468.]],
|
||||
[[666., 900., 684.], [936., 1264., 960.], [738., 996., 756.]],
|
||||
|
@ -62,7 +62,7 @@ mod tests {
|
|||
[[666., 900., 684.], [936., 1264., 960.], [738., 996., 756.]],
|
||||
],
|
||||
]),
|
||||
bias: TestTensor::from_floats([32., 32.]),
|
||||
bias: TestTensor::from_floats_devauto([32., 32.]),
|
||||
};
|
||||
test.assert_grads(grads);
|
||||
}
|
||||
|
@ -86,7 +86,7 @@ mod tests {
|
|||
width: 4,
|
||||
};
|
||||
let grads = Grads {
|
||||
x: TestTensor::from_floats([
|
||||
x: TestTensor::from_floats_devauto([
|
||||
[
|
||||
[
|
||||
[240., 369., 369., 252.],
|
||||
|
@ -116,7 +116,7 @@ mod tests {
|
|||
],
|
||||
],
|
||||
]),
|
||||
weight: TestTensor::from_floats([
|
||||
weight: TestTensor::from_floats_devauto([
|
||||
[
|
||||
[[378., 516., 396.], [552., 752., 576.], [450., 612., 468.]],
|
||||
[[666., 900., 684.], [936., 1264., 960.], [738., 996., 756.]],
|
||||
|
@ -130,7 +130,7 @@ mod tests {
|
|||
[[666., 900., 684.], [936., 1264., 960.], [738., 996., 756.]],
|
||||
],
|
||||
]),
|
||||
bias: TestTensor::from_floats([32., 32., 32.]),
|
||||
bias: TestTensor::from_floats_devauto([32., 32., 32.]),
|
||||
};
|
||||
test.assert_grads(grads);
|
||||
}
|
||||
|
@ -154,7 +154,7 @@ mod tests {
|
|||
width: 4,
|
||||
};
|
||||
let grads = Grads {
|
||||
x: TestTensor::from_floats([[
|
||||
x: TestTensor::from_floats_devauto([[
|
||||
[
|
||||
[116., 180., 192., 132.],
|
||||
[198., 306., 324., 222.],
|
||||
|
@ -168,7 +168,7 @@ mod tests {
|
|||
[244., 372., 384., 260.],
|
||||
],
|
||||
]]),
|
||||
weight: TestTensor::from_floats([
|
||||
weight: TestTensor::from_floats_devauto([
|
||||
[
|
||||
[
|
||||
[27., 45., 54., 39.],
|
||||
|
@ -194,7 +194,7 @@ mod tests {
|
|||
],
|
||||
],
|
||||
]),
|
||||
bias: TestTensor::from_floats([12., 12.]),
|
||||
bias: TestTensor::from_floats_devauto([12., 12.]),
|
||||
};
|
||||
test.assert_grads(grads);
|
||||
}
|
||||
|
@ -218,7 +218,7 @@ mod tests {
|
|||
width: 4,
|
||||
};
|
||||
let grads = Grads {
|
||||
x: TestTensor::from_floats([[
|
||||
x: TestTensor::from_floats_devauto([[
|
||||
[
|
||||
[138., 138., 138., 138.],
|
||||
[234., 234., 234., 234.],
|
||||
|
@ -232,7 +232,7 @@ mod tests {
|
|||
[282., 282., 282., 282.],
|
||||
],
|
||||
]]),
|
||||
weight: TestTensor::from_floats([
|
||||
weight: TestTensor::from_floats_devauto([
|
||||
[
|
||||
[[66., 66., 66.], [120., 120., 120.], [114., 114., 114.]],
|
||||
[[258., 258., 258.], [376., 376., 376.], [306., 306., 306.]],
|
||||
|
@ -242,7 +242,7 @@ mod tests {
|
|||
[[258., 258., 258.], [376., 376., 376.], [306., 306., 306.]],
|
||||
],
|
||||
]),
|
||||
bias: TestTensor::from_floats([24., 24.]),
|
||||
bias: TestTensor::from_floats_devauto([24., 24.]),
|
||||
};
|
||||
test.assert_grads(grads);
|
||||
}
|
||||
|
@ -266,7 +266,7 @@ mod tests {
|
|||
width: 5,
|
||||
};
|
||||
let grads = Grads {
|
||||
x: TestTensor::from_floats([[
|
||||
x: TestTensor::from_floats_devauto([[
|
||||
[
|
||||
[88., 138., 138., 138., 96.],
|
||||
[150., 234., 234., 234., 162.],
|
||||
|
@ -280,7 +280,7 @@ mod tests {
|
|||
[184., 282., 282., 282., 192.],
|
||||
],
|
||||
]]),
|
||||
weight: TestTensor::from_floats([
|
||||
weight: TestTensor::from_floats_devauto([
|
||||
[
|
||||
[[78., 105., 90.], [144., 190., 160.], [138., 180., 150.]],
|
||||
[[318., 405., 330.], [464., 590., 480.], [378., 480., 390.]],
|
||||
|
@ -290,7 +290,7 @@ mod tests {
|
|||
[[318., 405., 330.], [464., 590., 480.], [378., 480., 390.]],
|
||||
],
|
||||
]),
|
||||
bias: TestTensor::from_floats([20., 20.]),
|
||||
bias: TestTensor::from_floats_devauto([20., 20.]),
|
||||
};
|
||||
test.assert_grads(grads);
|
||||
}
|
||||
|
@ -314,7 +314,7 @@ mod tests {
|
|||
width: 6,
|
||||
};
|
||||
let grads = Grads {
|
||||
x: TestTensor::from_floats([[
|
||||
x: TestTensor::from_floats_devauto([[
|
||||
[
|
||||
[26., 52., 26., 52., 26., 28.],
|
||||
[52., 104., 52., 104., 52., 56.],
|
||||
|
@ -332,7 +332,7 @@ mod tests {
|
|||
[50., 100., 50., 100., 50., 52.],
|
||||
],
|
||||
]]),
|
||||
weight: TestTensor::from_floats([
|
||||
weight: TestTensor::from_floats_devauto([
|
||||
[
|
||||
[[56., 84., 90.], [84., 126., 135.], [120., 180., 189.]],
|
||||
[[200., 300., 306.], [300., 450., 459.], [336., 504., 513.]],
|
||||
|
@ -342,7 +342,7 @@ mod tests {
|
|||
[[200., 300., 306.], [300., 450., 459.], [336., 504., 513.]],
|
||||
],
|
||||
]),
|
||||
bias: TestTensor::from_floats([9., 9.]),
|
||||
bias: TestTensor::from_floats_devauto([9., 9.]),
|
||||
};
|
||||
test.assert_grads(grads);
|
||||
}
|
||||
|
@ -366,7 +366,7 @@ mod tests {
|
|||
width: 8,
|
||||
};
|
||||
let grads = Grads {
|
||||
x: TestTensor::from_floats([[
|
||||
x: TestTensor::from_floats_devauto([[
|
||||
[
|
||||
[50., 78., 78., 78., 78., 78., 78., 54.],
|
||||
[62., 96., 96., 96., 96., 96., 96., 66.],
|
||||
|
@ -388,7 +388,7 @@ mod tests {
|
|||
[98., 150., 150., 150., 150., 150., 150., 102.],
|
||||
],
|
||||
]]),
|
||||
weight: TestTensor::from_floats([
|
||||
weight: TestTensor::from_floats_devauto([
|
||||
[
|
||||
[[434., 504., 448.], [567., 660., 588.], [735., 852., 756.]],
|
||||
[
|
||||
|
@ -406,7 +406,7 @@ mod tests {
|
|||
],
|
||||
],
|
||||
]),
|
||||
bias: TestTensor::from_floats([24., 24.]),
|
||||
bias: TestTensor::from_floats_devauto([24., 24.]),
|
||||
};
|
||||
test.assert_grads(grads);
|
||||
}
|
||||
|
@ -430,7 +430,7 @@ mod tests {
|
|||
width: 6,
|
||||
};
|
||||
let grads = Grads {
|
||||
x: TestTensor::from_floats([[
|
||||
x: TestTensor::from_floats_devauto([[
|
||||
[
|
||||
[18., 38., 38., 42., 42., 22.],
|
||||
[42., 88., 88., 96., 96., 50.],
|
||||
|
@ -448,7 +448,7 @@ mod tests {
|
|||
[48., 98., 98., 102., 102., 52.],
|
||||
],
|
||||
]]),
|
||||
weight: TestTensor::from_floats([
|
||||
weight: TestTensor::from_floats_devauto([
|
||||
[
|
||||
[[63., 102., 90.], [192., 280., 228.], [225., 318., 252.]],
|
||||
[[387., 534., 414.], [624., 856., 660.], [549., 750., 576.]],
|
||||
|
@ -458,7 +458,7 @@ mod tests {
|
|||
[[387., 534., 414.], [624., 856., 660.], [549., 750., 576.]],
|
||||
],
|
||||
]),
|
||||
bias: TestTensor::from_floats([16., 16.]),
|
||||
bias: TestTensor::from_floats_devauto([16., 16.]),
|
||||
};
|
||||
test.assert_grads(grads);
|
||||
}
|
||||
|
@ -482,7 +482,7 @@ mod tests {
|
|||
width: 6,
|
||||
};
|
||||
let grads = Grads {
|
||||
x: TestTensor::from_floats([[
|
||||
x: TestTensor::from_floats_devauto([[
|
||||
[
|
||||
[18., 0., 20., 20., 0., 22.],
|
||||
[42., 0., 46., 46., 0., 50.],
|
||||
|
@ -500,7 +500,7 @@ mod tests {
|
|||
[48., 0., 50., 50., 0., 52.],
|
||||
],
|
||||
]]),
|
||||
weight: TestTensor::from_floats([
|
||||
weight: TestTensor::from_floats_devauto([
|
||||
[
|
||||
[[18., 51., 33.], [60., 140., 80.], [72., 159., 87.]],
|
||||
[[126., 267., 141.], [204., 428., 224.], [180., 375., 195.]],
|
||||
|
@ -510,7 +510,7 @@ mod tests {
|
|||
[[126., 267., 141.], [204., 428., 224.], [180., 375., 195.]],
|
||||
],
|
||||
]),
|
||||
bias: TestTensor::from_floats([8., 8.]),
|
||||
bias: TestTensor::from_floats_devauto([8., 8.]),
|
||||
};
|
||||
test.assert_grads(grads);
|
||||
}
|
||||
|
@ -534,7 +534,7 @@ mod tests {
|
|||
width: 5,
|
||||
};
|
||||
let grads = Grads {
|
||||
x: TestTensor::from_floats([[
|
||||
x: TestTensor::from_floats_devauto([[
|
||||
[
|
||||
[0., 1., 3., 3., 2.],
|
||||
[3., 8., 15., 12., 7.],
|
||||
|
@ -550,11 +550,11 @@ mod tests {
|
|||
[15., 31., 48., 33., 17.],
|
||||
],
|
||||
]]),
|
||||
weight: TestTensor::from_floats([
|
||||
weight: TestTensor::from_floats_devauto([
|
||||
[[[54., 63., 72.], [99., 108., 117.], [144., 153., 162.]]],
|
||||
[[[279., 288., 297.], [324., 333., 342.], [369., 378., 387.]]],
|
||||
]),
|
||||
bias: TestTensor::from_floats([9., 9.]),
|
||||
bias: TestTensor::from_floats_devauto([9., 9.]),
|
||||
};
|
||||
test.assert_grads(grads);
|
||||
}
|
||||
|
@ -578,7 +578,7 @@ mod tests {
|
|||
width: 4,
|
||||
};
|
||||
let grads = Grads {
|
||||
x: TestTensor::from_floats([[
|
||||
x: TestTensor::from_floats_devauto([[
|
||||
[
|
||||
[9., 20., 24., 13.],
|
||||
[24., 52., 60., 32.],
|
||||
|
@ -598,7 +598,7 @@ mod tests {
|
|||
[93., 188., 192., 97.],
|
||||
],
|
||||
]]),
|
||||
weight: TestTensor::from_floats([
|
||||
weight: TestTensor::from_floats_devauto([
|
||||
[[[10., 14., 18.], [26., 30., 34.], [42., 46., 50.]]],
|
||||
[[[10., 14., 18.], [26., 30., 34.], [42., 46., 50.]]],
|
||||
[[[74., 78., 82.], [90., 94., 98.], [106., 110., 114.]]],
|
||||
|
@ -606,7 +606,7 @@ mod tests {
|
|||
[[[138., 142., 146.], [154., 158., 162.], [170., 174., 178.]]],
|
||||
[[[138., 142., 146.], [154., 158., 162.], [170., 174., 178.]]],
|
||||
]),
|
||||
bias: TestTensor::from_floats([4., 4., 4., 4., 4., 4.]),
|
||||
bias: TestTensor::from_floats_devauto([4., 4., 4., 4., 4., 4.]),
|
||||
};
|
||||
test.assert_grads(grads);
|
||||
}
|
||||
|
@ -630,7 +630,7 @@ mod tests {
|
|||
width: 5,
|
||||
};
|
||||
let grads = Grads {
|
||||
x: TestTensor::from_floats([[
|
||||
x: TestTensor::from_floats_devauto([[
|
||||
[
|
||||
[36., 39., 0., 39., 42.],
|
||||
[81., 87., 0., 87., 93.],
|
||||
|
@ -644,7 +644,7 @@ mod tests {
|
|||
[63., 66., 0., 66., 69.],
|
||||
],
|
||||
]]),
|
||||
weight: TestTensor::from_floats([
|
||||
weight: TestTensor::from_floats_devauto([
|
||||
[
|
||||
[[15., 42., 27.], [30., 72., 42.]],
|
||||
[[75., 162., 87.], [90., 192., 102.]],
|
||||
|
@ -658,7 +658,7 @@ mod tests {
|
|||
[[75., 162., 87.], [90., 192., 102.]],
|
||||
],
|
||||
]),
|
||||
bias: TestTensor::from_floats([8., 8., 8.]),
|
||||
bias: TestTensor::from_floats_devauto([8., 8., 8.]),
|
||||
};
|
||||
test.assert_grads(grads);
|
||||
}
|
||||
|
@ -695,21 +695,21 @@ mod tests {
|
|||
self.kernel_size_1,
|
||||
self.kernel_size_2,
|
||||
]);
|
||||
let weight = TestAutodiffTensor::from_data(
|
||||
TestTensorInt::arange(0..shape_weight.num_elements())
|
||||
let weight = TestAutodiffTensor::from_data_devauto(
|
||||
TestTensorInt::arange_devauto(0..shape_weight.num_elements())
|
||||
.reshape(shape_weight)
|
||||
.into_data()
|
||||
.convert(),
|
||||
)
|
||||
.require_grad();
|
||||
let bias = TestAutodiffTensor::from_data(
|
||||
TestTensorInt::arange(0..self.channels_out)
|
||||
let bias = TestAutodiffTensor::from_data_devauto(
|
||||
TestTensorInt::arange_devauto(0..self.channels_out)
|
||||
.into_data()
|
||||
.convert(),
|
||||
)
|
||||
.require_grad();
|
||||
let x = TestAutodiffTensor::from_data(
|
||||
TestTensorInt::arange(0..shape_x.num_elements())
|
||||
let x = TestAutodiffTensor::from_data_devauto(
|
||||
TestTensorInt::arange_devauto(0..shape_x.num_elements())
|
||||
.reshape(shape_x)
|
||||
.into_data()
|
||||
.convert(),
|
||||
|
|
|
@ -17,15 +17,15 @@ mod tests {
|
|||
size: 4,
|
||||
};
|
||||
let grads = Grads {
|
||||
x: TestTensor::from_floats([
|
||||
x: TestTensor::from_floats_devauto([
|
||||
[[15.0, 15.0, 15.0, 15.0], [51.0, 51.0, 51.0, 51.0]],
|
||||
[[15.0, 15.0, 15.0, 15.0], [51.0, 51.0, 51.0, 51.0]],
|
||||
]),
|
||||
weight: TestTensor::from_floats([
|
||||
weight: TestTensor::from_floats_devauto([
|
||||
[[44.0, 44.0, 44.0], [44.0, 44.0, 44.0]],
|
||||
[[76.0, 76.0, 76.0], [76.0, 76.0, 76.0]],
|
||||
]),
|
||||
bias: TestTensor::from_floats([12., 12.]),
|
||||
bias: TestTensor::from_floats_devauto([12., 12.]),
|
||||
};
|
||||
test.assert_grads(grads);
|
||||
}
|
||||
|
@ -44,15 +44,15 @@ mod tests {
|
|||
size: 4,
|
||||
};
|
||||
let grads = Grads {
|
||||
x: TestTensor::from_floats([
|
||||
x: TestTensor::from_floats_devauto([
|
||||
[[7., 12., 8., 3.], [19., 36., 32., 15.]],
|
||||
[[7., 12., 8., 3.], [19., 36., 32., 15.]],
|
||||
]),
|
||||
weight: TestTensor::from_floats([
|
||||
weight: TestTensor::from_floats_devauto([
|
||||
[[26., 22., 18.], [26., 22., 18.]],
|
||||
[[42., 38., 34.], [42., 38., 34.]],
|
||||
]),
|
||||
bias: TestTensor::from_floats([4., 4.]),
|
||||
bias: TestTensor::from_floats_devauto([4., 4.]),
|
||||
};
|
||||
test.assert_grads(grads);
|
||||
}
|
||||
|
@ -71,15 +71,15 @@ mod tests {
|
|||
size: 4,
|
||||
};
|
||||
let grads = Grads {
|
||||
x: TestTensor::from_floats([
|
||||
x: TestTensor::from_floats_devauto([
|
||||
[[15., 15., 15., 15.], [51., 51., 51., 51.]],
|
||||
[[15., 15., 15., 15.], [51., 51., 51., 51.]],
|
||||
]),
|
||||
weight: TestTensor::from_floats([
|
||||
weight: TestTensor::from_floats_devauto([
|
||||
[[44., 44., 44.], [44., 44., 44.]],
|
||||
[[76., 76., 76.], [76., 76., 76.]],
|
||||
]),
|
||||
bias: TestTensor::from_floats([18., 18.]),
|
||||
bias: TestTensor::from_floats_devauto([18., 18.]),
|
||||
};
|
||||
test.assert_grads(grads);
|
||||
}
|
||||
|
@ -98,15 +98,15 @@ mod tests {
|
|||
size: 4,
|
||||
};
|
||||
let grads = Grads {
|
||||
x: TestTensor::from_floats([
|
||||
x: TestTensor::from_floats_devauto([
|
||||
[[15., 15., 15., 15.], [51., 51., 51., 51.]],
|
||||
[[15., 15., 15., 15.], [51., 51., 51., 51.]],
|
||||
]),
|
||||
weight: TestTensor::from_floats([
|
||||
weight: TestTensor::from_floats_devauto([
|
||||
[[44., 44., 44.], [44., 44., 44.]],
|
||||
[[76., 76., 76.], [76., 76., 76.]],
|
||||
]),
|
||||
bias: TestTensor::from_floats([20., 20.]),
|
||||
bias: TestTensor::from_floats_devauto([20., 20.]),
|
||||
};
|
||||
test.assert_grads(grads);
|
||||
}
|
||||
|
@ -125,15 +125,15 @@ mod tests {
|
|||
size: 4,
|
||||
};
|
||||
let grads = Grads {
|
||||
x: TestTensor::from_floats([
|
||||
x: TestTensor::from_floats_devauto([
|
||||
[[15., 15., 15., 15.], [51., 51., 51., 51.]],
|
||||
[[15., 15., 15., 15.], [51., 51., 51., 51.]],
|
||||
]),
|
||||
weight: TestTensor::from_floats([
|
||||
weight: TestTensor::from_floats_devauto([
|
||||
[[44., 44., 44.], [44., 44., 44.]],
|
||||
[[76., 76., 76.], [76., 76., 76.]],
|
||||
]),
|
||||
bias: TestTensor::from_floats([16., 16.]),
|
||||
bias: TestTensor::from_floats_devauto([16., 16.]),
|
||||
};
|
||||
test.assert_grads(grads);
|
||||
}
|
||||
|
@ -152,7 +152,7 @@ mod tests {
|
|||
size: 8,
|
||||
};
|
||||
let grads = Grads {
|
||||
x: TestTensor::from_floats([
|
||||
x: TestTensor::from_floats_devauto([
|
||||
[
|
||||
[12.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0],
|
||||
[36.0, 51.0, 51.0, 51.0, 51.0, 51.0, 51.0, 51.0],
|
||||
|
@ -162,11 +162,11 @@ mod tests {
|
|||
[36.0, 51.0, 51.0, 51.0, 51.0, 51.0, 51.0, 51.0],
|
||||
],
|
||||
]),
|
||||
weight: TestTensor::from_floats([
|
||||
weight: TestTensor::from_floats_devauto([
|
||||
[[168.0, 184.0, 184.0], [168.0, 184.0, 184.0]],
|
||||
[[280.0, 312.0, 312.0], [280.0, 312.0, 312.0]],
|
||||
]),
|
||||
bias: TestTensor::from_floats([36.0, 36.0, 36.0, 36.0]),
|
||||
bias: TestTensor::from_floats_devauto([36.0, 36.0, 36.0, 36.0]),
|
||||
};
|
||||
test.assert_grads(grads);
|
||||
}
|
||||
|
@ -197,21 +197,21 @@ mod tests {
|
|||
self.channels[1] / self.groups,
|
||||
self.kernel_size,
|
||||
]);
|
||||
let weight = TestAutodiffTensor::from_data(
|
||||
TestTensorInt::arange(0..shape_weight.num_elements())
|
||||
let weight = TestAutodiffTensor::from_data_devauto(
|
||||
TestTensorInt::arange_devauto(0..shape_weight.num_elements())
|
||||
.reshape(shape_weight)
|
||||
.into_data()
|
||||
.convert(),
|
||||
)
|
||||
.require_grad();
|
||||
let bias = TestAutodiffTensor::from_data(
|
||||
TestTensorInt::arange(0..self.channels[1])
|
||||
let bias = TestAutodiffTensor::from_data_devauto(
|
||||
TestTensorInt::arange_devauto(0..self.channels[1])
|
||||
.into_data()
|
||||
.convert(),
|
||||
)
|
||||
.require_grad();
|
||||
let x = TestAutodiffTensor::from_data(
|
||||
TestTensorInt::arange(0..shape_x.num_elements())
|
||||
let x = TestAutodiffTensor::from_data_devauto(
|
||||
TestTensorInt::arange_devauto(0..shape_x.num_elements())
|
||||
.reshape(shape_x)
|
||||
.into_data()
|
||||
.convert(),
|
||||
|
|
|
@ -17,7 +17,7 @@ mod tests {
|
|||
size: [4, 4],
|
||||
};
|
||||
let grads = Grads {
|
||||
x: TestTensor::from_floats([
|
||||
x: TestTensor::from_floats_devauto([
|
||||
[
|
||||
[
|
||||
[153., 153., 153., 153.],
|
||||
|
@ -47,7 +47,7 @@ mod tests {
|
|||
],
|
||||
],
|
||||
]),
|
||||
weight: TestTensor::from_floats([
|
||||
weight: TestTensor::from_floats_devauto([
|
||||
[
|
||||
[[752., 752., 752.], [752., 752., 752.], [752., 752., 752.]],
|
||||
[[752., 752., 752.], [752., 752., 752.], [752., 752., 752.]],
|
||||
|
@ -65,7 +65,7 @@ mod tests {
|
|||
],
|
||||
],
|
||||
]),
|
||||
bias: TestTensor::from_floats([72., 72.]),
|
||||
bias: TestTensor::from_floats_devauto([72., 72.]),
|
||||
};
|
||||
test.assert_grads(grads);
|
||||
}
|
||||
|
@ -84,18 +84,18 @@ mod tests {
|
|||
size: [4, 4],
|
||||
};
|
||||
let grads = Grads {
|
||||
x: TestTensor::from_floats([[[
|
||||
x: TestTensor::from_floats_devauto([[[
|
||||
[13., 24., 20., 9.],
|
||||
[15., 27., 21., 9.],
|
||||
[15., 27., 21., 9.],
|
||||
[7., 12., 8., 3.],
|
||||
]]]),
|
||||
weight: TestTensor::from_floats([[[
|
||||
weight: TestTensor::from_floats_devauto([[[
|
||||
[63., 57., 51.],
|
||||
[68., 60., 52.],
|
||||
[39., 33., 27.],
|
||||
]]]),
|
||||
bias: TestTensor::from_floats([8.]),
|
||||
bias: TestTensor::from_floats_devauto([8.]),
|
||||
};
|
||||
test.assert_grads(grads);
|
||||
}
|
||||
|
@ -114,18 +114,18 @@ mod tests {
|
|||
size: [4, 4],
|
||||
};
|
||||
let grads = Grads {
|
||||
x: TestTensor::from_floats([[[
|
||||
x: TestTensor::from_floats_devauto([[[
|
||||
[36., 36., 36., 36.],
|
||||
[36., 36., 36., 36.],
|
||||
[36., 36., 36., 36.],
|
||||
[36., 36., 36., 36.],
|
||||
]]]),
|
||||
weight: TestTensor::from_floats([[[
|
||||
weight: TestTensor::from_floats_devauto([[[
|
||||
[120., 120., 120.],
|
||||
[120., 120., 120.],
|
||||
[120., 120., 120.],
|
||||
]]]),
|
||||
bias: TestTensor::from_floats([108.]),
|
||||
bias: TestTensor::from_floats_devauto([108.]),
|
||||
};
|
||||
test.assert_grads(grads);
|
||||
}
|
||||
|
@ -144,18 +144,18 @@ mod tests {
|
|||
size: [4, 4],
|
||||
};
|
||||
let grads = Grads {
|
||||
x: TestTensor::from_floats([[[
|
||||
x: TestTensor::from_floats_devauto([[[
|
||||
[36., 36., 36., 36.],
|
||||
[36., 36., 36., 36.],
|
||||
[36., 36., 36., 36.],
|
||||
[36., 36., 36., 36.],
|
||||
]]]),
|
||||
weight: TestTensor::from_floats([[[
|
||||
weight: TestTensor::from_floats_devauto([[[
|
||||
[120., 120., 120.],
|
||||
[120., 120., 120.],
|
||||
[120., 120., 120.],
|
||||
]]]),
|
||||
bias: TestTensor::from_floats([140.]),
|
||||
bias: TestTensor::from_floats_devauto([140.]),
|
||||
};
|
||||
test.assert_grads(grads);
|
||||
}
|
||||
|
@ -174,18 +174,18 @@ mod tests {
|
|||
size: [4, 4],
|
||||
};
|
||||
let grads = Grads {
|
||||
x: TestTensor::from_floats([[[
|
||||
x: TestTensor::from_floats_devauto([[[
|
||||
[36., 36., 36., 36.],
|
||||
[36., 36., 36., 36.],
|
||||
[36., 36., 36., 36.],
|
||||
[36., 36., 36., 36.],
|
||||
]]]),
|
||||
weight: TestTensor::from_floats([[[
|
||||
weight: TestTensor::from_floats_devauto([[[
|
||||
[120., 120., 120.],
|
||||
[120., 120., 120.],
|
||||
[120., 120., 120.],
|
||||
]]]),
|
||||
bias: TestTensor::from_floats([80.]),
|
||||
bias: TestTensor::from_floats_devauto([80.]),
|
||||
};
|
||||
test.assert_grads(grads);
|
||||
}
|
||||
|
@ -204,7 +204,7 @@ mod tests {
|
|||
size: [4, 4],
|
||||
};
|
||||
let grads = Grads {
|
||||
x: TestTensor::from_floats([[
|
||||
x: TestTensor::from_floats_devauto([[
|
||||
[
|
||||
[351., 351., 351., 351.],
|
||||
[351., 351., 351., 351.],
|
||||
|
@ -218,7 +218,7 @@ mod tests {
|
|||
[1080., 1080., 1080., 1080.],
|
||||
],
|
||||
]]),
|
||||
weight: TestTensor::from_floats([
|
||||
weight: TestTensor::from_floats_devauto([
|
||||
[
|
||||
[[120., 120., 120.], [120., 120., 120.], [120., 120., 120.]],
|
||||
[[120., 120., 120.], [120., 120., 120.], [120., 120., 120.]],
|
||||
|
@ -230,7 +230,7 @@ mod tests {
|
|||
[[376., 376., 376.], [376., 376., 376.], [376., 376., 376.]],
|
||||
],
|
||||
]),
|
||||
bias: TestTensor::from_floats([36., 36., 36.]),
|
||||
bias: TestTensor::from_floats_devauto([36., 36., 36.]),
|
||||
};
|
||||
test.assert_grads(grads);
|
||||
}
|
||||
|
@ -249,7 +249,7 @@ mod tests {
|
|||
size: [6, 6],
|
||||
};
|
||||
let grads = Grads {
|
||||
x: TestTensor::from_floats([[[
|
||||
x: TestTensor::from_floats_devauto([[[
|
||||
[105., 105., 105., 105., 105., 105.],
|
||||
[105., 105., 105., 105., 105., 105.],
|
||||
[105., 105., 105., 105., 105., 105.],
|
||||
|
@ -257,12 +257,12 @@ mod tests {
|
|||
[105., 105., 105., 105., 105., 105.],
|
||||
[105., 105., 105., 105., 105., 105.],
|
||||
]]]),
|
||||
weight: TestTensor::from_floats([[[
|
||||
weight: TestTensor::from_floats_devauto([[[
|
||||
[630., 630., 630., 630., 630.],
|
||||
[630., 630., 630., 630., 630.],
|
||||
[630., 630., 630., 630., 630.],
|
||||
]]]),
|
||||
bias: TestTensor::from_floats([80.]),
|
||||
bias: TestTensor::from_floats_devauto([80.]),
|
||||
};
|
||||
test.assert_grads(grads);
|
||||
}
|
||||
|
@ -281,7 +281,7 @@ mod tests {
|
|||
size: [4, 4],
|
||||
};
|
||||
let grads = Grads {
|
||||
x: TestTensor::from_floats([[
|
||||
x: TestTensor::from_floats_devauto([[
|
||||
[
|
||||
[36., 36., 36., 36.],
|
||||
[36., 36., 36., 36.],
|
||||
|
@ -295,11 +295,11 @@ mod tests {
|
|||
[117., 117., 117., 117.],
|
||||
],
|
||||
]]),
|
||||
weight: TestTensor::from_floats([
|
||||
weight: TestTensor::from_floats_devauto([
|
||||
[[[120., 120., 120.], [120., 120., 120.], [120., 120., 120.]]],
|
||||
[[[376., 376., 376.], [376., 376., 376.], [376., 376., 376.]]],
|
||||
]),
|
||||
bias: TestTensor::from_floats([36., 36.]),
|
||||
bias: TestTensor::from_floats_devauto([36., 36.]),
|
||||
};
|
||||
test.assert_grads(grads);
|
||||
}
|
||||
|
@ -318,7 +318,7 @@ mod tests {
|
|||
size: [6, 8],
|
||||
};
|
||||
let grads = Grads {
|
||||
x: TestTensor::from_floats([
|
||||
x: TestTensor::from_floats_devauto([
|
||||
[
|
||||
[
|
||||
[600., 735., 735., 735., 735., 735., 735., 735.],
|
||||
|
@ -356,7 +356,7 @@ mod tests {
|
|||
],
|
||||
],
|
||||
]),
|
||||
weight: TestTensor::from_floats([
|
||||
weight: TestTensor::from_floats_devauto([
|
||||
[
|
||||
[
|
||||
[5320., 6040., 6040., 6040., 6040.],
|
||||
|
@ -392,7 +392,7 @@ mod tests {
|
|||
],
|
||||
],
|
||||
]),
|
||||
bias: TestTensor::from_floats([896., 896., 896.]),
|
||||
bias: TestTensor::from_floats_devauto([896., 896., 896.]),
|
||||
};
|
||||
test.assert_grads(grads);
|
||||
}
|
||||
|
@ -411,7 +411,7 @@ mod tests {
|
|||
size: [10, 10],
|
||||
};
|
||||
let grads = Grads {
|
||||
x: TestTensor::from_floats([[
|
||||
x: TestTensor::from_floats_devauto([[
|
||||
[
|
||||
[30., 42., 42., 42., 42., 42., 42., 42., 42., 42.],
|
||||
[48., 66., 66., 66., 66., 66., 66., 66., 66., 66.],
|
||||
|
@ -461,7 +461,7 @@ mod tests {
|
|||
[336., 498., 498., 498., 498., 498., 498., 498., 498., 498.],
|
||||
],
|
||||
]]),
|
||||
weight: TestTensor::from_floats([
|
||||
weight: TestTensor::from_floats_devauto([
|
||||
[
|
||||
[[4455., 4905., 4905.], [4500., 4950., 4950.]],
|
||||
[[4455., 4905., 4905.], [4500., 4950., 4950.]],
|
||||
|
@ -479,7 +479,7 @@ mod tests {
|
|||
[[28755., 31905., 31905.], [31500., 34950., 34950.]],
|
||||
],
|
||||
]),
|
||||
bias: TestTensor::from_floats([570., 570.]),
|
||||
bias: TestTensor::from_floats_devauto([570., 570.]),
|
||||
};
|
||||
test.assert_grads(grads);
|
||||
}
|
||||
|
@ -498,7 +498,7 @@ mod tests {
|
|||
size: [10, 10],
|
||||
};
|
||||
let grads = Grads {
|
||||
x: TestTensor::from_floats([[
|
||||
x: TestTensor::from_floats_devauto([[
|
||||
[
|
||||
[9., 12., 12., 12., 12., 12., 12., 12., 12., 12.],
|
||||
[12., 15., 15., 15., 15., 15., 15., 15., 15., 15.],
|
||||
|
@ -548,13 +548,13 @@ mod tests {
|
|||
[84., 123., 123., 123., 123., 123., 123., 123., 123., 123.],
|
||||
],
|
||||
]]),
|
||||
weight: TestTensor::from_floats([
|
||||
weight: TestTensor::from_floats_devauto([
|
||||
[[[4455., 4905., 4905.], [4500., 4950., 4950.]]],
|
||||
[[[12555., 13905., 13905.], [13500., 14950., 14950.]]],
|
||||
[[[20655., 22905., 22905.], [22500., 24950., 24950.]]],
|
||||
[[[28755., 31905., 31905.], [31500., 34950., 34950.]]],
|
||||
]),
|
||||
bias: TestTensor::from_floats([570., 570.]),
|
||||
bias: TestTensor::from_floats_devauto([570., 570.]),
|
||||
};
|
||||
test.assert_grads(grads);
|
||||
}
|
||||
|
@ -591,21 +591,21 @@ mod tests {
|
|||
self.kernel_size[0],
|
||||
self.kernel_size[1],
|
||||
]);
|
||||
let weight = TestAutodiffTensor::from_data(
|
||||
TestTensorInt::arange(0..shape_weight.num_elements())
|
||||
let weight = TestAutodiffTensor::from_data_devauto(
|
||||
TestTensorInt::arange_devauto(0..shape_weight.num_elements())
|
||||
.reshape(shape_weight)
|
||||
.into_data()
|
||||
.convert(),
|
||||
)
|
||||
.require_grad();
|
||||
let bias = TestAutodiffTensor::from_data(
|
||||
TestTensorInt::arange(0..self.channels[1])
|
||||
let bias = TestAutodiffTensor::from_data_devauto(
|
||||
TestTensorInt::arange_devauto(0..self.channels[1])
|
||||
.into_data()
|
||||
.convert(),
|
||||
)
|
||||
.require_grad();
|
||||
let x = TestAutodiffTensor::from_data(
|
||||
TestTensorInt::arange(0..shape_x.num_elements())
|
||||
let x = TestAutodiffTensor::from_data_devauto(
|
||||
TestTensorInt::arange_devauto(0..shape_x.num_elements())
|
||||
.reshape(shape_x)
|
||||
.into_data()
|
||||
.convert(),
|
||||
|
|
|
@ -8,8 +8,8 @@ mod tests {
|
|||
let data_1 = Data::<f32, 2>::from([[0.0, 1.0], [3.0, 4.0]]);
|
||||
let data_2 = Data::<f32, 2>::from([[6.0, 7.0], [9.0, 10.0]]);
|
||||
|
||||
let tensor_1 = TestAutodiffTensor::from_data(data_1).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::from_data(data_2).require_grad();
|
||||
let tensor_1 = TestAutodiffTensor::from_data_devauto(data_1).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::from_data_devauto(data_2).require_grad();
|
||||
|
||||
let tensor_3 = tensor_1.clone().matmul(tensor_2.clone().cos());
|
||||
let tensor_4 = tensor_3.matmul(tensor_2.clone());
|
||||
|
|
|
@ -9,10 +9,10 @@ mod tests {
|
|||
let data_2 = Data::from([[6.0, 7.0], [9.0, 10.0]]);
|
||||
let data_targets = Data::from([[0.8, 0.2], [0.9, 0.1]]);
|
||||
|
||||
let tensor_1 = Tensor::<TestAutodiffBackend, 2>::from_data(data_1).require_grad();
|
||||
let tensor_2 = Tensor::<TestAutodiffBackend, 2>::from_data(data_2).require_grad();
|
||||
let tensor_1 = Tensor::<TestAutodiffBackend, 2>::from_data_devauto(data_1).require_grad();
|
||||
let tensor_2 = Tensor::<TestAutodiffBackend, 2>::from_data_devauto(data_2).require_grad();
|
||||
let tensor_targets =
|
||||
Tensor::<TestAutodiffBackend, 2>::from_data(data_targets).require_grad();
|
||||
Tensor::<TestAutodiffBackend, 2>::from_data_devauto(data_targets).require_grad();
|
||||
|
||||
let tensor_3 = tensor_1.clone().matmul(tensor_2.clone());
|
||||
let tensor_4 = loss::cross_entropy_with_logits(tensor_3, tensor_targets);
|
||||
|
|
|
@ -8,8 +8,8 @@ mod tests {
|
|||
let data_1 = Data::from([1.0, 7.0]);
|
||||
let data_2 = Data::from([4.0, 7.0]);
|
||||
|
||||
let tensor_1 = TestAutodiffTensor::from_data(data_1).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::from_data(data_2).require_grad();
|
||||
let tensor_1 = TestAutodiffTensor::from_data_devauto(data_1).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::from_data_devauto(data_2).require_grad();
|
||||
|
||||
let tensor_3 = tensor_1.clone().div(tensor_2.clone());
|
||||
let grads = tensor_3.backward();
|
||||
|
@ -29,7 +29,7 @@ mod tests {
|
|||
fn should_diff_div_scalar() {
|
||||
let data = Data::from([1.0, 7.0]);
|
||||
|
||||
let tensor = TestAutodiffTensor::from_data(data).require_grad();
|
||||
let tensor = TestAutodiffTensor::from_data_devauto(data).require_grad();
|
||||
let tensor_out = tensor.clone().div_scalar(4.0);
|
||||
|
||||
let grads = tensor_out.backward();
|
||||
|
@ -44,9 +44,9 @@ mod tests {
|
|||
let data_2: Data<f32, 2> = Data::from([[4.0, 7.0], [2.0, 3.0]]);
|
||||
let data_3: Data<f32, 2> = Data::from([[2.0, 2.0], [2.0, 2.0]]);
|
||||
|
||||
let tensor_1 = TestAutodiffTensor::from_data(data_1).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::from_data(data_2).require_grad();
|
||||
let tensor_3 = TestAutodiffTensor::from_data(data_3).require_grad();
|
||||
let tensor_1 = TestAutodiffTensor::from_data_devauto(data_1).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::from_data_devauto(data_2).require_grad();
|
||||
let tensor_3 = TestAutodiffTensor::from_data_devauto(data_3).require_grad();
|
||||
|
||||
let tensor_4 = tensor_1.clone().div(tensor_2.clone());
|
||||
let tensor_5 = tensor_4.div(tensor_3);
|
||||
|
@ -69,8 +69,8 @@ mod tests {
|
|||
let data_1 = Data::from([[0.0, 1.0], [3.0, 4.0]]);
|
||||
let data_2 = Data::from([[6.0, 7.0], [9.0, 10.0]]);
|
||||
|
||||
let tensor_1 = TestAutodiffTensor::from_data(data_1).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::from_data(data_2).require_grad();
|
||||
let tensor_1 = TestAutodiffTensor::from_data_devauto(data_1).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::from_data_devauto(data_2).require_grad();
|
||||
|
||||
let tensor_3 = tensor_1.clone().matmul(tensor_2.clone());
|
||||
let tensor_4 = tensor_3.div(tensor_2.clone());
|
||||
|
|
|
@ -8,8 +8,8 @@ mod tests {
|
|||
let data_1 = Data::<f32, 2>::from([[0.0, 1.0], [3.0, 4.0]]);
|
||||
let data_2 = Data::<f32, 2>::from([[6.0, 7.0], [9.0, 10.0]]);
|
||||
|
||||
let tensor_1 = TestAutodiffTensor::from_data(data_1).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::from_data(data_2).require_grad();
|
||||
let tensor_1 = TestAutodiffTensor::from_data_devauto(data_1).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::from_data_devauto(data_2).require_grad();
|
||||
|
||||
let tensor_3 = tensor_1.clone().matmul(tensor_2.clone().erf());
|
||||
let tensor_4 = tensor_3.matmul(tensor_2.clone());
|
||||
|
|
|
@ -8,8 +8,8 @@ mod tests {
|
|||
let data_1 = Data::<f32, 2>::from([[1.0, 7.0], [-2.0, -3.0]]);
|
||||
let data_2 = Data::<f32, 2>::from([[4.0, -7.0], [2.0, 3.0]]);
|
||||
|
||||
let tensor_1 = TestAutodiffTensor::from_data(data_1).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::from_data(data_2).require_grad();
|
||||
let tensor_1 = TestAutodiffTensor::from_data_devauto(data_1).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::from_data_devauto(data_2).require_grad();
|
||||
|
||||
let tensor_3 = tensor_1.clone().matmul(tensor_2.clone().exp());
|
||||
let grads = tensor_3.backward();
|
||||
|
|
|
@ -6,9 +6,9 @@ mod tests {
|
|||
#[test]
|
||||
fn test_gather_grad() {
|
||||
let tensor_1 =
|
||||
TestAutodiffTensor::from_data(Data::from([[0.0, 1.0, 2.0], [3.0, 4.0, 5.0]]))
|
||||
TestAutodiffTensor::from_data_devauto(Data::from([[0.0, 1.0, 2.0], [3.0, 4.0, 5.0]]))
|
||||
.require_grad();
|
||||
let indices = Tensor::<TestAutodiffBackend, 2, Int>::from_data(Data::from([
|
||||
let indices = Tensor::<TestAutodiffBackend, 2, Int>::from_data_devauto(Data::from([
|
||||
[2, 1, 0, 1, 2],
|
||||
[1, 0, 2, 1, 0],
|
||||
]));
|
||||
|
@ -30,12 +30,15 @@ mod tests {
|
|||
#[test]
|
||||
fn test_scatter_grad() {
|
||||
let tensor_1 =
|
||||
TestAutodiffTensor::from_data(Data::from([[0.0, 1.0, 2.0], [3.0, 4.0, 5.0]]))
|
||||
TestAutodiffTensor::from_data_devauto(Data::from([[0.0, 1.0, 2.0], [3.0, 4.0, 5.0]]))
|
||||
.require_grad();
|
||||
let values = TestAutodiffTensor::from_data(Data::from([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]))
|
||||
.require_grad();
|
||||
let indices =
|
||||
Tensor::<TestAutodiffBackend, 2, Int>::from_data(Data::from([[2, 1, 0], [2, 0, 1]]));
|
||||
let values =
|
||||
TestAutodiffTensor::from_data_devauto(Data::from([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]))
|
||||
.require_grad();
|
||||
let indices = Tensor::<TestAutodiffBackend, 2, Int>::from_data_devauto(Data::from([
|
||||
[2, 1, 0],
|
||||
[2, 0, 1],
|
||||
]));
|
||||
|
||||
let tensor_2 = tensor_1.clone().matmul(tensor_1.clone().transpose());
|
||||
let tensor_3 = tensor_1.clone().scatter(1, indices, values.clone());
|
||||
|
|
|
@ -5,8 +5,10 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn should_diff_gelu() {
|
||||
let tensor_1 = TestAutodiffTensor::from_floats([[0.0, 1.0], [-3.0, 4.0]]).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::from_floats([[6.0, -0.5], [9.0, 10.0]]).require_grad();
|
||||
let tensor_1 =
|
||||
TestAutodiffTensor::from_floats_devauto([[0.0, 1.0], [-3.0, 4.0]]).require_grad();
|
||||
let tensor_2 =
|
||||
TestAutodiffTensor::from_floats_devauto([[6.0, -0.5], [9.0, 10.0]]).require_grad();
|
||||
|
||||
let x = tensor_1.clone().matmul(activation::gelu(tensor_2.clone()));
|
||||
let x = tensor_1.clone().matmul(x);
|
||||
|
|
|
@ -5,8 +5,9 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn should_update_tensor_when_grad_replace() {
|
||||
let tensor_1 = TestAutodiffTensor::random([32, 32], Distribution::Default).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::random([32, 32], Distribution::Default);
|
||||
let tensor_1 =
|
||||
TestAutodiffTensor::random_devauto([32, 32], Distribution::Default).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::random_devauto([32, 32], Distribution::Default);
|
||||
|
||||
let x = tensor_1.clone().matmul(activation::gelu(tensor_2));
|
||||
let mut grads = x.backward();
|
||||
|
@ -14,7 +15,7 @@ mod tests {
|
|||
let grad_1 = tensor_1.grad(&grads).unwrap();
|
||||
|
||||
let grad_1_updated =
|
||||
TestAutodiffTensor::random([32, 32], Distribution::Default).require_grad();
|
||||
TestAutodiffTensor::random_devauto([32, 32], Distribution::Default).require_grad();
|
||||
tensor_1.grad_replace(&mut grads, grad_1_updated.clone().inner());
|
||||
|
||||
let grad_1_new = tensor_1.grad(&grads).unwrap();
|
||||
|
|
|
@ -8,8 +8,8 @@ mod tests {
|
|||
let data_1 = Data::<f32, 2>::from([[0.0, 1.0], [3.0, 4.0]]);
|
||||
let data_2 = Data::<f32, 2>::from([[6.0, 7.0], [9.0, 10.0]]);
|
||||
|
||||
let tensor_1 = TestAutodiffTensor::from_data(data_1).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::from_data(data_2).require_grad();
|
||||
let tensor_1 = TestAutodiffTensor::from_data_devauto(data_1).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::from_data_devauto(data_2).require_grad();
|
||||
|
||||
let tensor_3 = tensor_1.clone().matmul(tensor_2.clone().log());
|
||||
let tensor_4 = tensor_3.matmul(tensor_2.clone());
|
||||
|
|
|
@ -8,8 +8,8 @@ mod tests {
|
|||
let data_1 = Data::<f32, 2>::from([[0.0, 1.0], [3.0, 4.0]]);
|
||||
let data_2 = Data::<f32, 2>::from([[6.0, 7.0], [9.0, 10.0]]);
|
||||
|
||||
let tensor_1 = TestAutodiffTensor::from_data(data_1).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::from_data(data_2).require_grad();
|
||||
let tensor_1 = TestAutodiffTensor::from_data_devauto(data_1).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::from_data_devauto(data_2).require_grad();
|
||||
|
||||
let tensor_3 = tensor_1.clone().matmul(tensor_2.clone().log1p());
|
||||
let tensor_4 = tensor_3.matmul(tensor_2.clone());
|
||||
|
|
|
@ -9,9 +9,9 @@ mod tests {
|
|||
let data_2 = Data::<f32, 2>::from([[4.0, 7.0], [2.0, 3.0]]);
|
||||
let mask = Data::<bool, 2>::from([[true, false], [false, true]]);
|
||||
|
||||
let tensor_1 = TestAutodiffTensor::from_data(data_1).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::from_data(data_2).require_grad();
|
||||
let mask = Tensor::<TestAutodiffBackend, 2, Bool>::from_bool(mask);
|
||||
let tensor_1 = TestAutodiffTensor::from_data_devauto(data_1).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::from_data_devauto(data_2).require_grad();
|
||||
let mask = Tensor::<TestAutodiffBackend, 2, Bool>::from_bool_devauto(mask);
|
||||
|
||||
let tensor_3 = tensor_1.clone().matmul(tensor_2.clone());
|
||||
let tensor_4 = tensor_3.mask_fill(mask, 2.0);
|
||||
|
@ -26,11 +26,16 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn should_diff_mask_where() {
|
||||
let tensor_1 = TestAutodiffTensor::from_data([[1.0, 7.0], [2.0, 3.0]]).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::from_data([[4.0, 7.0], [2.0, 3.0]]).require_grad();
|
||||
let tensor_3 = TestAutodiffTensor::from_data([[8.8, 9.8], [10.8, 11.8]]).require_grad();
|
||||
let mask =
|
||||
Tensor::<TestAutodiffBackend, 2, Bool>::from_data([[true, false], [false, true]]);
|
||||
let tensor_1 =
|
||||
TestAutodiffTensor::from_data_devauto([[1.0, 7.0], [2.0, 3.0]]).require_grad();
|
||||
let tensor_2 =
|
||||
TestAutodiffTensor::from_data_devauto([[4.0, 7.0], [2.0, 3.0]]).require_grad();
|
||||
let tensor_3 =
|
||||
TestAutodiffTensor::from_data_devauto([[8.8, 9.8], [10.8, 11.8]]).require_grad();
|
||||
let mask = Tensor::<TestAutodiffBackend, 2, Bool>::from_data_devauto([
|
||||
[true, false],
|
||||
[false, true],
|
||||
]);
|
||||
|
||||
let tensor_4 = tensor_1.clone().matmul(tensor_2.clone());
|
||||
let tensor_5 = tensor_4.clone().matmul(tensor_3.clone());
|
||||
|
|
|
@ -8,8 +8,8 @@ mod tests {
|
|||
let data_1: Data<f32, 2> = Data::from([[1.0, 7.0], [2.0, 3.0]]);
|
||||
let data_2: Data<f32, 2> = Data::from([[4.0, 7.0], [2.0, 3.0]]);
|
||||
|
||||
let tensor_1 = TestAutodiffTensor::from_data(data_1).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::from_data(data_2).require_grad();
|
||||
let tensor_1 = TestAutodiffTensor::from_data_devauto(data_1).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::from_data_devauto(data_2).require_grad();
|
||||
|
||||
let tensor_3 = tensor_1.clone().matmul(tensor_2.clone());
|
||||
let grads = tensor_3.backward();
|
||||
|
@ -31,9 +31,9 @@ mod tests {
|
|||
let data_2: Data<f32, 2> = Data::from([[4.0, 7.0], [2.0, 3.0]]);
|
||||
let data_3: Data<f32, 2> = Data::from([[2.0, 2.0], [2.0, 2.0]]);
|
||||
|
||||
let tensor_1 = TestAutodiffTensor::from_data(data_1).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::from_data(data_2).require_grad();
|
||||
let tensor_3 = TestAutodiffTensor::from_data(data_3).require_grad();
|
||||
let tensor_1 = TestAutodiffTensor::from_data_devauto(data_1).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::from_data_devauto(data_2).require_grad();
|
||||
let tensor_3 = TestAutodiffTensor::from_data_devauto(data_3).require_grad();
|
||||
|
||||
let tensor_4 = tensor_1.clone().matmul(tensor_2.clone());
|
||||
let tensor_5 = tensor_4.matmul(tensor_3);
|
||||
|
@ -53,9 +53,9 @@ mod tests {
|
|||
let data_2: Data<f32, 2> = Data::from([[4.0, 7.0], [2.0, 3.0]]);
|
||||
let data_3: Data<f32, 2> = Data::from([[2.0, 2.0], [2.0, 2.0]]);
|
||||
|
||||
let tensor_1 = TestAutodiffTensor::from_data(data_1).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::from_data(data_2).require_grad();
|
||||
let tensor_3 = TestAutodiffTensor::from_data(data_3).require_grad();
|
||||
let tensor_1 = TestAutodiffTensor::from_data_devauto(data_1).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::from_data_devauto(data_2).require_grad();
|
||||
let tensor_3 = TestAutodiffTensor::from_data_devauto(data_3).require_grad();
|
||||
|
||||
let tensor_4 = tensor_1.clone().matmul(tensor_2.clone());
|
||||
let tensor_5 = tensor_4.matmul(tensor_3.clone());
|
||||
|
|
|
@ -5,8 +5,10 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn should_diff_max_dim() {
|
||||
let tensor_1 = TestAutodiffTensor::from_floats([[1.0, 7.0], [-2.0, -3.0]]).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::from_floats([[4.0, -7.0], [2.0, 3.0]]).require_grad();
|
||||
let tensor_1 =
|
||||
TestAutodiffTensor::from_floats_devauto([[1.0, 7.0], [-2.0, -3.0]]).require_grad();
|
||||
let tensor_2 =
|
||||
TestAutodiffTensor::from_floats_devauto([[4.0, -7.0], [2.0, 3.0]]).require_grad();
|
||||
|
||||
let tensor_3 = tensor_1.clone().matmul(tensor_2.clone());
|
||||
let tensor_4 = tensor_1.clone().mul(tensor_3.max_dim(1).unsqueeze());
|
||||
|
@ -25,8 +27,10 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn should_diff_min_dim() {
|
||||
let tensor_1 = TestAutodiffTensor::from_floats([[1.0, 7.0], [-2.0, -3.0]]).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::from_floats([[4.0, -7.0], [2.0, 3.0]]).require_grad();
|
||||
let tensor_1 =
|
||||
TestAutodiffTensor::from_floats_devauto([[1.0, 7.0], [-2.0, -3.0]]).require_grad();
|
||||
let tensor_2 =
|
||||
TestAutodiffTensor::from_floats_devauto([[4.0, -7.0], [2.0, 3.0]]).require_grad();
|
||||
|
||||
let tensor_3 = tensor_1.clone().matmul(tensor_2.clone());
|
||||
let tensor_4 = tensor_1.clone().mul(tensor_3.min_dim(1).unsqueeze());
|
||||
|
|
|
@ -10,10 +10,11 @@ mod tests {
|
|||
let stride = 1;
|
||||
let dilation = 1;
|
||||
|
||||
let x =
|
||||
TestAutodiffTensor::from_floats([[[0.9861, 0.5474, 0.4477, 0.0732, 0.3548, 0.8221]]])
|
||||
.require_grad();
|
||||
let x_grad_expected = TestAutodiffTensor::from_floats([[[1., 1., 0., 0., 0., 1.]]]);
|
||||
let x = TestAutodiffTensor::from_floats_devauto([[[
|
||||
0.9861, 0.5474, 0.4477, 0.0732, 0.3548, 0.8221,
|
||||
]]])
|
||||
.require_grad();
|
||||
let x_grad_expected = TestAutodiffTensor::from_floats_devauto([[[1., 1., 0., 0., 0., 1.]]]);
|
||||
|
||||
let output = max_pool1d(x.clone(), kernel_size, stride, padding, dilation);
|
||||
let grads = output.backward();
|
||||
|
@ -32,13 +33,13 @@ mod tests {
|
|||
let stride = 1;
|
||||
let dilation = 2;
|
||||
|
||||
let x = TestAutodiffTensor::from_floats([[[
|
||||
let x = TestAutodiffTensor::from_floats_devauto([[[
|
||||
0.5388, 0.0676, 0.7122, 0.8316, 0.0653, 0.9154, 0.1536, 0.9089, 0.8016, 0.7518, 0.2073,
|
||||
0.0501, 0.8811, 0.5604, 0.5075, 0.4384, 0.9963, 0.9698, 0.4988, 0.2609, 0.3391, 0.2230,
|
||||
0.4610, 0.5365, 0.6880,
|
||||
]]])
|
||||
.require_grad();
|
||||
let x_grad_expected = TestAutodiffTensor::from_floats([[[
|
||||
let x_grad_expected = TestAutodiffTensor::from_floats_devauto([[[
|
||||
0., 0., 1., 0., 0., 3., 0., 1., 2., 1., 0., 0., 2., 0., 0., 0., 4., 4., 0., 0., 0., 0.,
|
||||
0., 0., 1.,
|
||||
]]]);
|
||||
|
@ -60,13 +61,13 @@ mod tests {
|
|||
let stride = 1;
|
||||
let dilation = 1;
|
||||
|
||||
let x = TestAutodiffTensor::from_floats([[[
|
||||
let x = TestAutodiffTensor::from_floats_devauto([[[
|
||||
0.5388, 0.0676, 0.7122, 0.8316, 0.0653, 0.9154, 0.1536, 0.9089, 0.8016, 0.7518, 0.2073,
|
||||
0.0501, 0.8811, 0.5604, 0.5075, 0.4384, 0.9963, 0.9698, 0.4988, 0.2609, 0.3391, 0.2230,
|
||||
0.4610, 0.5365, 0.6880,
|
||||
]]])
|
||||
.require_grad();
|
||||
let x_grad_expected = TestAutodiffTensor::from_floats([[[
|
||||
let x_grad_expected = TestAutodiffTensor::from_floats_devauto([[[
|
||||
0., 0., 0., 2., 0., 4., 0., 2., 1., 0., 0., 0., 4., 0., 0., 0., 4., 1., 1., 0., 0., 0.,
|
||||
1., 1., 1.,
|
||||
]]]);
|
||||
|
@ -88,13 +89,13 @@ mod tests {
|
|||
let stride = 1;
|
||||
let dilation = 1;
|
||||
|
||||
let x = TestAutodiffTensor::from_floats([[[
|
||||
let x = TestAutodiffTensor::from_floats_devauto([[[
|
||||
0.5388, 0.0676, 0.7122, 0.8316, 0.0653, 0.9154, 0.1536, 0.9089, 0.8016, 0.7518, 0.2073,
|
||||
0.0501, 0.8811, 0.5604, 0.5075, 0.4384, 0.9963, 0.9698, 0.4988, 0.2609, 0.3391, 0.2230,
|
||||
0.4610, 0.5365, 0.6880,
|
||||
]]])
|
||||
.require_grad();
|
||||
let x_grad_expected = TestAutodiffTensor::from_floats([[[
|
||||
let x_grad_expected = TestAutodiffTensor::from_floats_devauto([[[
|
||||
1., 0., 1., 2., 0., 4., 0., 2., 1., 0., 0., 0., 4., 0., 0., 0., 4., 1., 1., 0., 0., 0.,
|
||||
1., 1., 3.,
|
||||
]]]);
|
||||
|
|
|
@ -14,14 +14,14 @@ mod tests {
|
|||
let dilation_1 = 1;
|
||||
let dilation_2 = 1;
|
||||
|
||||
let x = TestAutodiffTensor::from_floats([[[
|
||||
let x = TestAutodiffTensor::from_floats_devauto([[[
|
||||
[0.2479, 0.6386, 0.3166, 0.5742],
|
||||
[0.7065, 0.1940, 0.6305, 0.8959],
|
||||
[0.5416, 0.8602, 0.8129, 0.1662],
|
||||
[0.3358, 0.3059, 0.8293, 0.0990],
|
||||
]]])
|
||||
.require_grad();
|
||||
let x_grad_expected = TestAutodiffTensor::from_floats([[[
|
||||
let x_grad_expected = TestAutodiffTensor::from_floats_devauto([[[
|
||||
[0.0, 0.0, 0.0, 0.0],
|
||||
[0.0, 0.0, 0.0, 2.0],
|
||||
[0.0, 2.0, 0.0, 0.0],
|
||||
|
@ -55,14 +55,14 @@ mod tests {
|
|||
let dilation_1 = 1;
|
||||
let dilation_2 = 1;
|
||||
|
||||
let x = TestAutodiffTensor::from_floats([[[
|
||||
let x = TestAutodiffTensor::from_floats_devauto([[[
|
||||
[0.2479, 0.6386, 0.3166, 0.5742],
|
||||
[0.7065, 0.1940, 0.6305, 0.8959],
|
||||
[0.5416, 0.8602, 0.8129, 0.1662],
|
||||
[0.3358, 0.3059, 0.8293, 0.0990],
|
||||
]]])
|
||||
.require_grad();
|
||||
let x_grad_expected = TestAutodiffTensor::from_floats([[[
|
||||
let x_grad_expected = TestAutodiffTensor::from_floats_devauto([[[
|
||||
[1., 3., 0., 2.],
|
||||
[3., 0., 0., 4.],
|
||||
[1., 4., 0., 1.],
|
||||
|
@ -96,14 +96,14 @@ mod tests {
|
|||
let dilation_1 = 2;
|
||||
let dilation_2 = 2;
|
||||
|
||||
let x = TestAutodiffTensor::from_floats([[[
|
||||
let x = TestAutodiffTensor::from_floats_devauto([[[
|
||||
[0.2479, 0.6386, 0.3166, 0.5742],
|
||||
[0.7065, 0.1940, 0.6305, 0.8959],
|
||||
[0.5416, 0.8602, 0.8129, 0.1662],
|
||||
[0.3358, 0.3059, 0.8293, 0.0990],
|
||||
]]])
|
||||
.require_grad();
|
||||
let x_grad_expected = TestAutodiffTensor::from_floats([[[
|
||||
let x_grad_expected = TestAutodiffTensor::from_floats_devauto([[[
|
||||
[0., 0., 0., 0.],
|
||||
[1., 1., 1., 2.],
|
||||
[0., 4., 4., 0.],
|
||||
|
@ -137,7 +137,7 @@ mod tests {
|
|||
let dilation_1 = 1;
|
||||
let dilation_2 = 1;
|
||||
|
||||
let x = TestAutodiffTensor::from_floats([[[
|
||||
let x = TestAutodiffTensor::from_floats_devauto([[[
|
||||
[0.5388, 0.0676, 0.7122, 0.8316, 0.0653],
|
||||
[0.9154, 0.1536, 0.9089, 0.8016, 0.7518],
|
||||
[0.2073, 0.0501, 0.8811, 0.5604, 0.5075],
|
||||
|
@ -145,7 +145,7 @@ mod tests {
|
|||
[0.3391, 0.2230, 0.4610, 0.5365, 0.6880],
|
||||
]]])
|
||||
.require_grad();
|
||||
let x_grad_expected = TestAutodiffTensor::from_floats([[[
|
||||
let x_grad_expected = TestAutodiffTensor::from_floats_devauto([[[
|
||||
[0., 0., 0., 3., 0.],
|
||||
[4., 0., 2., 1., 0.],
|
||||
[0., 0., 0., 0., 0.],
|
||||
|
|
|
@ -8,8 +8,8 @@ mod tests {
|
|||
let data_1 = Data::from([1.0, 7.0]);
|
||||
let data_2 = Data::from([4.0, 7.0]);
|
||||
|
||||
let tensor_1 = TestAutodiffTensor::from_data(data_1.clone()).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::from_data(data_2.clone()).require_grad();
|
||||
let tensor_1 = TestAutodiffTensor::from_data_devauto(data_1.clone()).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::from_data_devauto(data_2.clone()).require_grad();
|
||||
|
||||
let tensor_3 = tensor_1.clone().mul(tensor_2.clone());
|
||||
let grads = tensor_3.backward();
|
||||
|
@ -26,7 +26,7 @@ mod tests {
|
|||
fn should_diff_mul_scalar() {
|
||||
let data = Data::from([2.0, 5.0]);
|
||||
|
||||
let tensor = TestAutodiffTensor::from_data(data).require_grad();
|
||||
let tensor = TestAutodiffTensor::from_data_devauto(data).require_grad();
|
||||
let tensor_out = tensor.clone().mul_scalar(4.0);
|
||||
|
||||
let grads = tensor_out.backward();
|
||||
|
@ -42,9 +42,9 @@ mod tests {
|
|||
let data_2: Data<f32, 2> = Data::from([[4.0, 7.0], [2.0, 3.0]]);
|
||||
let data_3: Data<f32, 2> = Data::from([[2.0, 2.0], [2.0, 2.0]]);
|
||||
|
||||
let tensor_1 = TestAutodiffTensor::from_data(data_1).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::from_data(data_2).require_grad();
|
||||
let tensor_3 = TestAutodiffTensor::from_data(data_3).require_grad();
|
||||
let tensor_1 = TestAutodiffTensor::from_data_devauto(data_1).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::from_data_devauto(data_2).require_grad();
|
||||
let tensor_3 = TestAutodiffTensor::from_data_devauto(data_3).require_grad();
|
||||
|
||||
let tensor_4 = tensor_1.clone().mul(tensor_2.clone());
|
||||
let tensor_5 = tensor_4.mul(tensor_3);
|
||||
|
|
|
@ -9,8 +9,8 @@ mod tests {
|
|||
let data_2: Data<f32, 2> = Data::from([[4.0, 7.0], [2.0, 3.0]]);
|
||||
|
||||
let with_move = || {
|
||||
let tensor_1 = TestAutodiffTensor::from_data(data_1.clone()).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::from_data(data_2.clone()).require_grad();
|
||||
let tensor_1 = TestAutodiffTensor::from_data_devauto(data_1.clone()).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::from_data_devauto(data_2.clone()).require_grad();
|
||||
|
||||
let tensor_3 = tensor_1.clone().matmul(tensor_2.clone());
|
||||
let tensor_4 = tensor_3.clone().matmul(tensor_2.clone());
|
||||
|
@ -51,8 +51,8 @@ mod tests {
|
|||
(grad_1, grad_2)
|
||||
};
|
||||
let without_move = || {
|
||||
let tensor_1 = TestAutodiffTensor::from_data(data_1.clone()).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::from_data(data_2.clone()).require_grad();
|
||||
let tensor_1 = TestAutodiffTensor::from_data_devauto(data_1.clone()).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::from_data_devauto(data_2.clone()).require_grad();
|
||||
|
||||
let tensor_3 = tensor_1.clone().matmul(tensor_2.clone());
|
||||
let tensor_4 = tensor_3.clone().matmul(tensor_2.clone());
|
||||
|
|
|
@ -8,8 +8,8 @@ mod tests {
|
|||
let data_1 = Data::<f32, 2>::from([[1.0, 7.0], [2.0, 3.0]]);
|
||||
let data_2 = Data::<f32, 2>::from([[4.0, 7.0], [2.0, 3.0]]);
|
||||
|
||||
let tensor_1 = TestAutodiffTensor::from_data(data_1).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::from_data(data_2).require_grad();
|
||||
let tensor_1 = TestAutodiffTensor::from_data_devauto(data_1).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::from_data_devauto(data_2).require_grad();
|
||||
|
||||
let tensor_3 = tensor_1.clone().matmul(tensor_2.clone().neg());
|
||||
let tensor_4 = tensor_3.neg();
|
||||
|
|
|
@ -8,8 +8,8 @@ mod tests {
|
|||
let data_1 = Data::<f32, 2>::from([[0.0, 1.0], [3.0, 4.0]]);
|
||||
let data_2 = Data::<f32, 2>::from([[6.0, 7.0], [9.0, 10.0]]);
|
||||
|
||||
let tensor_1 = TestAutodiffTensor::from_data(data_1).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::from_data(data_2).require_grad();
|
||||
let tensor_1 = TestAutodiffTensor::from_data_devauto(data_1).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::from_data_devauto(data_2).require_grad();
|
||||
|
||||
let tensor_3 = tensor_1.clone().matmul(tensor_2.clone().powf(0.4));
|
||||
let tensor_4 = tensor_3.matmul(tensor_2.clone());
|
||||
|
|
|
@ -7,7 +7,7 @@ mod tests {
|
|||
fn should_diff_recip() {
|
||||
let data = Data::from([2.0, 5.0, 0.4]);
|
||||
|
||||
let tensor = TestAutodiffTensor::from_data(data).require_grad();
|
||||
let tensor = TestAutodiffTensor::from_data_devauto(data).require_grad();
|
||||
let tensor_out = tensor.clone().recip();
|
||||
|
||||
let grads = tensor_out.backward();
|
||||
|
|
|
@ -8,8 +8,8 @@ mod tests {
|
|||
let data_1 = Data::<f32, 2>::from([[1.0, 7.0], [-2.0, -3.0]]);
|
||||
let data_2 = Data::<f32, 2>::from([[4.0, -7.0], [2.0, 3.0]]);
|
||||
|
||||
let tensor_1 = TestAutodiffTensor::from_data(data_1).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::from_data(data_2).require_grad();
|
||||
let tensor_1 = TestAutodiffTensor::from_data_devauto(data_1).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::from_data_devauto(data_2).require_grad();
|
||||
|
||||
let tensor_3 = tensor_1.clone().matmul(tensor_2.clone());
|
||||
let tensor_4 = activation::relu(tensor_3);
|
||||
|
|
|
@ -8,8 +8,8 @@ mod tests {
|
|||
let data_1: Data<f32, 2> = Data::from([[1.0, 7.0], [2.0, 3.0]]);
|
||||
let data_2: Data<f32, 1> = Data::from([4.0, 7.0, 2.0, 3.0]);
|
||||
|
||||
let tensor_1 = TestAutodiffTensor::from_data(data_1).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::from_data(data_2).require_grad();
|
||||
let tensor_1 = TestAutodiffTensor::from_data_devauto(data_1).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::from_data_devauto(data_2).require_grad();
|
||||
|
||||
let tensor_3 = tensor_2.clone().reshape([2, 2]);
|
||||
let tensor_4 = tensor_1.clone().matmul(tensor_3);
|
||||
|
|
|
@ -6,9 +6,9 @@ mod tests {
|
|||
#[test]
|
||||
fn test_select_grad() {
|
||||
let tensor_1 =
|
||||
TestAutodiffTensor::from_data(Data::from([[0.0, 1.0, 2.0], [3.0, 4.0, 5.0]]))
|
||||
TestAutodiffTensor::from_data_devauto(Data::from([[0.0, 1.0, 2.0], [3.0, 4.0, 5.0]]))
|
||||
.require_grad();
|
||||
let indices = Tensor::<TestAutodiffBackend, 1, Int>::from_data(Data::from([1, 0]));
|
||||
let indices = Tensor::<TestAutodiffBackend, 1, Int>::from_data_devauto(Data::from([1, 0]));
|
||||
|
||||
let tensor_2 = tensor_1.clone().matmul(tensor_1.clone().transpose());
|
||||
let tensor_3 = tensor_1.clone().select(0, indices);
|
||||
|
@ -27,11 +27,12 @@ mod tests {
|
|||
#[test]
|
||||
fn test_select_assign_grad() {
|
||||
let tensor_1 =
|
||||
TestAutodiffTensor::from_data(Data::from([[0.0, 1.0, 2.0], [3.0, 4.0, 5.0]]))
|
||||
TestAutodiffTensor::from_data_devauto(Data::from([[0.0, 1.0, 2.0], [3.0, 4.0, 5.0]]))
|
||||
.require_grad();
|
||||
let values = TestAutodiffTensor::from_data(Data::from([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]))
|
||||
.require_grad();
|
||||
let indices = Tensor::<TestAutodiffBackend, 1, Int>::from_data(Data::from([1, 0]));
|
||||
let values =
|
||||
TestAutodiffTensor::from_data_devauto(Data::from([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]))
|
||||
.require_grad();
|
||||
let indices = Tensor::<TestAutodiffBackend, 1, Int>::from_data_devauto(Data::from([1, 0]));
|
||||
|
||||
let tensor_2 = tensor_1.clone().matmul(tensor_1.clone().transpose());
|
||||
let tensor_3 = tensor_1.clone().select_assign(0, indices, values.clone());
|
||||
|
|
|
@ -8,8 +8,8 @@ mod tests {
|
|||
let data_1 = Data::<f32, 2>::from([[0.0, 1.0], [3.0, 4.0]]);
|
||||
let data_2 = Data::<f32, 2>::from([[6.0, 7.0], [9.0, 10.0]]);
|
||||
|
||||
let tensor_1 = TestAutodiffTensor::from_data(data_1).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::from_data(data_2).require_grad();
|
||||
let tensor_1 = TestAutodiffTensor::from_data_devauto(data_1).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::from_data_devauto(data_2).require_grad();
|
||||
|
||||
let tensor_3 = tensor_1.clone().matmul(tensor_2.clone().sin());
|
||||
let tensor_4 = tensor_3.matmul(tensor_2.clone());
|
||||
|
|
|
@ -8,8 +8,8 @@ mod tests {
|
|||
let data_1: Data<f32, 2> = Data::from([[1.0, 7.0], [2.0, 3.0]]);
|
||||
let data_2: Data<f32, 2> = Data::from([[4.0, 7.0, 100.0], [2.0, 3.0, 15.0]]);
|
||||
|
||||
let tensor_1 = TestAutodiffTensor::from_data(data_1).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::from_data(data_2).require_grad();
|
||||
let tensor_1 = TestAutodiffTensor::from_data_devauto(data_1).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::from_data_devauto(data_2).require_grad();
|
||||
|
||||
let tensor_3 = tensor_2.clone().slice([0..2, 0..2]);
|
||||
let tensor_4 = tensor_1.clone().matmul(tensor_3);
|
||||
|
@ -31,9 +31,9 @@ mod tests {
|
|||
let data_2: Data<f32, 2> = Data::from([[4.0, 7.0], [2.0, 3.0]]);
|
||||
let data_assigned: Data<f32, 2> = Data::from([[9.0]]);
|
||||
|
||||
let tensor_1 = TestAutodiffTensor::from_data(data_1).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::from_data(data_2).require_grad();
|
||||
let tensor_assigned = TestAutodiffTensor::from_data(data_assigned).require_grad();
|
||||
let tensor_1 = TestAutodiffTensor::from_data_devauto(data_1).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::from_data_devauto(data_2).require_grad();
|
||||
let tensor_assigned = TestAutodiffTensor::from_data_devauto(data_assigned).require_grad();
|
||||
|
||||
let tensor_3 = tensor_1.clone().matmul(tensor_2.clone());
|
||||
let tensor_4 = tensor_3.slice_assign([0..1, 0..1], tensor_assigned);
|
||||
|
@ -54,9 +54,9 @@ mod tests {
|
|||
let data_2: Data<f32, 2> = Data::from([[4.0, 7.0], [2.0, 3.0]]);
|
||||
let data_3: Data<f32, 2> = Data::from([[9.0]]);
|
||||
|
||||
let tensor_1 = TestAutodiffTensor::from_data(data_1).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::from_data(data_2).require_grad();
|
||||
let tensor_3 = TestAutodiffTensor::from_data(data_3).require_grad();
|
||||
let tensor_1 = TestAutodiffTensor::from_data_devauto(data_1).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::from_data_devauto(data_2).require_grad();
|
||||
let tensor_3 = TestAutodiffTensor::from_data_devauto(data_3).require_grad();
|
||||
|
||||
let tensor_4 = tensor_1.clone().matmul(tensor_2.clone());
|
||||
let tensor_5 = tensor_2.clone().slice([0..1, 0..1]);
|
||||
|
|
|
@ -7,8 +7,8 @@ mod tests {
|
|||
fn test_softmax_grad() {
|
||||
let data_1 = Data::from([[0.0, 1.0], [3.0, 4.0]]);
|
||||
let data_2 = Data::from([[6.0, 7.0], [9.0, 10.0]]);
|
||||
let tensor_1 = Tensor::<TestAutodiffBackend, 2>::from_data(data_1).require_grad();
|
||||
let tensor_2 = Tensor::<TestAutodiffBackend, 2>::from_data(data_2).require_grad();
|
||||
let tensor_1 = Tensor::<TestAutodiffBackend, 2>::from_data_devauto(data_1).require_grad();
|
||||
let tensor_2 = Tensor::<TestAutodiffBackend, 2>::from_data_devauto(data_2).require_grad();
|
||||
|
||||
let tensor_3 = tensor_1.clone().matmul(tensor_2.clone());
|
||||
let tensor_4 = activation::softmax(tensor_3, 1).matmul(tensor_2.clone());
|
||||
|
@ -29,8 +29,8 @@ mod tests {
|
|||
fn test_log_softmax_grad() {
|
||||
let data_1 = Data::from([[0.0, 1.0], [3.0, 4.0]]);
|
||||
let data_2 = Data::from([[6.0, 7.0], [9.0, 10.0]]);
|
||||
let tensor_1 = Tensor::<TestAutodiffBackend, 2>::from_data(data_1).require_grad();
|
||||
let tensor_2 = Tensor::<TestAutodiffBackend, 2>::from_data(data_2).require_grad();
|
||||
let tensor_1 = Tensor::<TestAutodiffBackend, 2>::from_data_devauto(data_1).require_grad();
|
||||
let tensor_2 = Tensor::<TestAutodiffBackend, 2>::from_data_devauto(data_2).require_grad();
|
||||
|
||||
let tensor_3 = tensor_1.clone().matmul(tensor_2.clone());
|
||||
let tensor_4 = activation::log_softmax(tensor_3, 1).matmul(tensor_2.clone());
|
||||
|
@ -52,8 +52,8 @@ mod tests {
|
|||
let data_1 = Data::from([[0.0, 1.0], [3.0, 4.0]]);
|
||||
let data_2 = Data::from([[6.0, 7.0], [9.0, 10.0]]);
|
||||
|
||||
let tensor_1 = Tensor::<TestAutodiffBackend, 2>::from_data(data_1).require_grad();
|
||||
let tensor_2 = Tensor::<TestAutodiffBackend, 2>::from_data(data_2).require_grad();
|
||||
let tensor_1 = Tensor::<TestAutodiffBackend, 2>::from_data_devauto(data_1).require_grad();
|
||||
let tensor_2 = Tensor::<TestAutodiffBackend, 2>::from_data_devauto(data_2).require_grad();
|
||||
|
||||
let tensor_3 = tensor_1.clone().matmul(tensor_2.clone());
|
||||
let tensor_4 = activation::softmax(tensor_3, 1).matmul(tensor_2.clone());
|
||||
|
|
|
@ -8,8 +8,8 @@ mod tests {
|
|||
let data_1 = Data::<f32, 2>::from([[0.0, 1.0], [3.0, 4.0]]);
|
||||
let data_2 = Data::<f32, 2>::from([[6.0, 7.0], [9.0, 10.0]]);
|
||||
|
||||
let tensor_1 = TestAutodiffTensor::from_data(data_1).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::from_data(data_2).require_grad();
|
||||
let tensor_1 = TestAutodiffTensor::from_data_devauto(data_1).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::from_data_devauto(data_2).require_grad();
|
||||
|
||||
let tensor_3 = tensor_1.clone().matmul(tensor_2.clone().sqrt());
|
||||
let tensor_4 = tensor_3.matmul(tensor_2.clone());
|
||||
|
|
|
@ -8,8 +8,8 @@ mod tests {
|
|||
let data_1 = Data::from([2.0, 5.0]);
|
||||
let data_2 = Data::from([4.0, 1.0]);
|
||||
|
||||
let tensor_1 = TestAutodiffTensor::from_data(data_1).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::from_data(data_2).require_grad();
|
||||
let tensor_1 = TestAutodiffTensor::from_data_devauto(data_1).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::from_data_devauto(data_2).require_grad();
|
||||
|
||||
let tensor_3 = tensor_1.clone().sub(tensor_2.clone());
|
||||
let grads = tensor_3.backward();
|
||||
|
@ -25,7 +25,7 @@ mod tests {
|
|||
#[test]
|
||||
fn should_diff_sub_scalar() {
|
||||
let data = Data::from([2.0, 10.0]);
|
||||
let tensor = TestAutodiffTensor::from_data(data).require_grad();
|
||||
let tensor = TestAutodiffTensor::from_data_devauto(data).require_grad();
|
||||
let tensor_out = tensor.clone().sub_scalar(5.0);
|
||||
let grads = tensor_out.backward();
|
||||
|
||||
|
@ -41,9 +41,9 @@ mod tests {
|
|||
let data_2: Data<f32, 2> = Data::from([[4.0, 7.0], [2.0, 3.0]]);
|
||||
let data_3: Data<f32, 2> = Data::from([[2.0, 2.0], [2.0, 2.0]]);
|
||||
|
||||
let tensor_1 = TestAutodiffTensor::from_data(data_1).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::from_data(data_2).require_grad();
|
||||
let tensor_3 = TestAutodiffTensor::from_data(data_3).require_grad();
|
||||
let tensor_1 = TestAutodiffTensor::from_data_devauto(data_1).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::from_data_devauto(data_2).require_grad();
|
||||
let tensor_3 = TestAutodiffTensor::from_data_devauto(data_3).require_grad();
|
||||
|
||||
let tensor_4 = tensor_1.clone().sub(tensor_2.clone());
|
||||
let tensor_5 = tensor_4.sub(tensor_3).sub_scalar(5.0);
|
||||
|
|
|
@ -8,8 +8,8 @@ mod tests {
|
|||
let data_1 = Data::<f32, 2>::from([[0.0, 1.0], [3.0, 4.0]]);
|
||||
let data_2 = Data::<f32, 2>::from([[6.0, 7.0], [9.0, 10.0]]);
|
||||
|
||||
let tensor_1 = TestAutodiffTensor::from_data(data_1).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::from_data(data_2).require_grad();
|
||||
let tensor_1 = TestAutodiffTensor::from_data_devauto(data_1).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::from_data_devauto(data_2).require_grad();
|
||||
|
||||
let tensor_3 = tensor_1.clone().matmul(tensor_2.clone().tanh());
|
||||
let tensor_4 = tensor_3.matmul(tensor_2.clone());
|
||||
|
|
|
@ -8,8 +8,8 @@ mod tests {
|
|||
let data_1 = Data::<f32, 2>::from([[1.0, 7.0], [2.0, 3.0]]);
|
||||
let data_2 = Data::<f32, 2>::from([[4.0, 7.0], [2.0, 3.0]]);
|
||||
|
||||
let tensor_1 = TestAutodiffTensor::from_data(data_1).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::from_data(data_2).require_grad();
|
||||
let tensor_1 = TestAutodiffTensor::from_data_devauto(data_1).require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::from_data_devauto(data_2).require_grad();
|
||||
|
||||
let tensor_3 = tensor_1.clone().matmul(tensor_2.clone().transpose());
|
||||
let tensor_4 = tensor_3.transpose();
|
||||
|
@ -24,12 +24,16 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn should_diff_swap_dims() {
|
||||
let tensor_1 =
|
||||
TestAutodiffTensor::from_floats([[[0.0, 1.0], [3.0, 4.0]], [[6.0, 7.0], [9.0, 10.0]]])
|
||||
.require_grad();
|
||||
let tensor_2 =
|
||||
TestAutodiffTensor::from_floats([[[1.0, 4.0], [2.0, 5.0]], [[7.0, 10.0], [8.0, 11.0]]])
|
||||
.require_grad();
|
||||
let tensor_1 = TestAutodiffTensor::from_floats_devauto([
|
||||
[[0.0, 1.0], [3.0, 4.0]],
|
||||
[[6.0, 7.0], [9.0, 10.0]],
|
||||
])
|
||||
.require_grad();
|
||||
let tensor_2 = TestAutodiffTensor::from_floats_devauto([
|
||||
[[1.0, 4.0], [2.0, 5.0]],
|
||||
[[7.0, 10.0], [8.0, 11.0]],
|
||||
])
|
||||
.require_grad();
|
||||
|
||||
let tensor_3 = tensor_1.clone().matmul(tensor_2.clone().swap_dims(0, 2));
|
||||
let tensor_4 = tensor_3.matmul(tensor_2.clone().swap_dims(1, 2));
|
||||
|
|
|
@ -47,7 +47,7 @@ impl<B: Backend> Batcher<MNISTItem, MNISTBatch<B>> for MNISTBatcher<B> {
|
|||
let images = items
|
||||
.iter()
|
||||
.map(|item| Data::<f32, 2>::from(item.image))
|
||||
.map(|data| Tensor::<B, 2>::from_data(data.convert()))
|
||||
.map(|data| Tensor::<B, 2>::from_data_devauto(data.convert()))
|
||||
.map(|tensor| tensor.reshape([1, 28, 28]))
|
||||
// Normalize: make between [0,1] and make the mean=0 and std=1
|
||||
// values mean=0.1307,std=0.3081 are from the PyTorch MNIST example
|
||||
|
@ -57,7 +57,7 @@ impl<B: Backend> Batcher<MNISTItem, MNISTBatch<B>> for MNISTBatcher<B> {
|
|||
|
||||
let targets = items
|
||||
.iter()
|
||||
.map(|item| Tensor::<B, 1, Int>::from_data(Data::from([(item.label as i64).elem()])))
|
||||
.map(|item| Tensor::<B, 1, Int>::from_data_devauto(Data::from([(item.label as i64).elem()])))
|
||||
.collect();
|
||||
|
||||
let images = Tensor::cat(images, 0).to_device(&self.device);
|
||||
|
|
|
@ -77,14 +77,14 @@ pub struct ModelConfig {
|
|||
|
||||
impl ModelConfig {
|
||||
/// Returns the initialized model.
|
||||
pub fn init<B: Backend>(&self) -> Model<B> {
|
||||
pub fn init<B: Backend>(&self, device: &B::Device) -> Model<B> {
|
||||
Model {
|
||||
conv1: Conv2dConfig::new([1, 8], [3, 3]).init(),
|
||||
conv2: Conv2dConfig::new([8, 16], [3, 3]).init(),
|
||||
conv1: Conv2dConfig::new([1, 8], [3, 3]).init(device),
|
||||
conv2: Conv2dConfig::new([8, 16], [3, 3]).init(device),
|
||||
pool: AdaptiveAvgPool2dConfig::new([8, 8]).init(),
|
||||
activation: ReLU::new(),
|
||||
linear1: LinearConfig::new(16 * 8 * 8, self.hidden_size).init(),
|
||||
linear2: LinearConfig::new(self.hidden_size, self.num_classes).init(),
|
||||
linear1: LinearConfig::new(16 * 8 * 8, self.hidden_size).init(device),
|
||||
linear2: LinearConfig::new(self.hidden_size, self.num_classes).init(device),
|
||||
dropout: DropoutConfig::new(self.dropout).init(),
|
||||
}
|
||||
}
|
||||
|
|
|
@ -47,9 +47,9 @@ config. Therefore, initialization methods should be implemented on the config st
|
|||
```rust, ignore
|
||||
impl MyModuleConfig {
|
||||
/// Create a module with random weights.
|
||||
pub fn init(&self) -> MyModule {
|
||||
pub fn init<B: Backend>(&self, device: &B::Device) -> MyModule {
|
||||
MyModule {
|
||||
linear: LinearConfig::new(self.d_model, self.d_ff).init(),
|
||||
linear: LinearConfig::new(self.d_model, self.d_ff).init(device),
|
||||
dropout: DropoutConfig::new(self.dropout).init(),
|
||||
}
|
||||
}
|
||||
|
@ -70,5 +70,7 @@ impl MyModuleConfig {
|
|||
Then we could add this line to the above `main`:
|
||||
|
||||
```rust, ignore
|
||||
let my_module = config.init();
|
||||
use burn::backend::Wgpu;
|
||||
let device = Default::default();
|
||||
let my_module = config.init::<Wgpu>(&device);
|
||||
```
|
||||
|
|
|
@ -38,8 +38,8 @@ Those operations are available for all tensor kinds: `Int`, `Float`, and `Bool`.
|
|||
|
||||
| Burn | PyTorch Equivalent |
|
||||
| ---------------------------------------- | ------------------------------------ |
|
||||
| `Tensor::empty(shape)` | `torch.empty(shape)` |
|
||||
| `Tensor::empty_device(shape, device)` | `torch.empty(shape, device=device)` |
|
||||
| `Tensor::empty_devauto(shape)` | `torch.empty(shape)` |
|
||||
| `Tensor::empty(shape, device)` | `torch.empty(shape, device=device)` |
|
||||
| `tensor.dims()` | `tensor.size()` |
|
||||
| `tensor.shape()` | `tensor.shape` |
|
||||
| `tensor.reshape(shape)` | `tensor.view(shape)` |
|
||||
|
@ -58,8 +58,8 @@ Those operations are available for all tensor kinds: `Int`, `Float`, and `Bool`.
|
|||
| `Tensor::cat(tensors, dim)` | `torch.cat(tensors, dim)` |
|
||||
| `tensor.into_data()` | N/A |
|
||||
| `tensor.to_data()` | N/A |
|
||||
| `Tensor::from_data(data)` | N/A |
|
||||
| `Tensor::from_data_device(data, device)` | N/A |
|
||||
| `Tensor::from_data_devauto(data)` | N/A |
|
||||
| `Tensor::from_data(data, device)` | N/A |
|
||||
| `tensor.into_primitive()` | N/A |
|
||||
| `Tensor::from_primitive(primitive)` | N/A |
|
||||
| `Tensor::stack(tensors, dim)` | torch.stack(tensors, dim)` |
|
||||
|
@ -81,11 +81,11 @@ Those operations are available for numeric tensor kinds: `Float` and `Int`.
|
|||
| `tensor * scalar` or `tensor.mul_scalar(scalar)` | `tensor * scalar` |
|
||||
| `-tensor` or `tensor.neg()` | `-tensor` |
|
||||
| `Tensor::zeros(shape)` | `torch.zeros(shape)` |
|
||||
| `Tensor::zeros_device(shape, device)` | `torch.zeros(shape, device=device)` |
|
||||
| `Tensor::ones(shape)` | `torch.ones(shape)` |
|
||||
| `Tensor::ones_device(shape, device)` | `torch.ones(shape, device=device)` |
|
||||
| `Tensor::full(shape, fill_value)` | `torch.full(shape, fill_value)` |
|
||||
| `Tensor::full_device(shape, fill_value, device)` | `torch.full(shape, fill_value, device=device)` |
|
||||
| `Tensor::zeros(shape, device)` | `torch.zeros(shape, device=device)` |
|
||||
| `Tensor::ones_devauto(shape)` | `torch.ones(shape)` |
|
||||
| `Tensor::ones(shape, device)` | `torch.ones(shape, device=device)` |
|
||||
| `Tensor::full_devauto(shape, fill_value)` | `torch.full(shape, fill_value)` |
|
||||
| `Tensor::full(shape, fill_value, device)` | `torch.full(shape, fill_value, device=device)` |
|
||||
| `tensor.mean()` | `tensor.mean()` |
|
||||
| `tensor.sum()` | `tensor.sum()` |
|
||||
| `tensor.mean_dim(dim)` | `tensor.mean(dim)` |
|
||||
|
@ -136,7 +136,8 @@ Those operations are only available for `Float` tensors.
|
|||
| `tensor.cos()` | `tensor.cos()` |
|
||||
| `tensor.sin()` | `tensor.sin()` |
|
||||
| `tensor.tanh()` | `tensor.tanh()` |
|
||||
| `tensor.from_floats(floats)` | N/A |
|
||||
| `tensor.from_floats_devauto(floats)` | N/A |
|
||||
| `tensor.from_floats(floats, device)` | N/A |
|
||||
| `tensor.int()` | Similar to `tensor.to(torch.long)` |
|
||||
| `tensor.zeros_like()` | `torch.zeros_like(tensor)` |
|
||||
| `tensor.ones_like()` | `torch.ones_like(tensor)` |
|
||||
|
@ -149,8 +150,8 @@ Those operations are only available for `Float` tensors.
|
|||
| `tensor.var_bias(dim)` | N/A |
|
||||
| `tensor.var_mean(dim)` | N/A |
|
||||
| `tensor.var_mean_bias(dim)` | N/A |
|
||||
| `tensor.random(shape, distribution)` | N/A |
|
||||
| `tensor.random_device(shape, distribution, device)` | N/A |
|
||||
| `tensor.random_devauto(shape, distribution)` | N/A |
|
||||
| `tensor.random(shape, distribution, device)` | N/A |
|
||||
| `tensor.to_full_precision()` | `tensor.to(torch.float)` |
|
||||
| `tensor.from_full_precision(tensor)` | N/A |
|
||||
|
||||
|
@ -162,10 +163,10 @@ Those operations are only available for `Int` tensors.
|
|||
| --------------------------------------------- | ------------------------------------------------------- |
|
||||
| `tensor.from_ints(ints)` | N/A |
|
||||
| `tensor.float()` | Similar to `tensor.to(torch.float)` |
|
||||
| `tensor.arange(5..10)` | `tensor.arange(start=5, end=10)` |
|
||||
| `tensor.arange_device(5..10, device)` | `tensor.arange(start=5, end=10, device=device)` |
|
||||
| `tensor.arange_step(5..10, 2)` | `tensor.arange(start=5, end=10, step=2)` |
|
||||
| `tensor.arange_step_device(5..10, 2, device)` | `tensor.arange(start=5, end=10, step=2, device=device)` |
|
||||
| `tensor.arange_devauto(5..10)` | `tensor.arange(start=5, end=10)` |
|
||||
| `tensor.arange(5..10, device) ` | `tensor.arange(start=5, end=10, device=device)` |
|
||||
| `tensor.arange_step_devauto(5..10, 2)` | `tensor.arange(start=5, end=10, step=2)` |
|
||||
| `tensor.arange_step(5..10, 2, device)` | `tensor.arange(start=5, end=10, step=2, device=device)` |
|
||||
|
||||
# Bool Operations
|
||||
|
||||
|
|
|
@ -27,7 +27,7 @@ pub struct MnistTrainingConfig {
|
|||
pub optimizer: AdamConfig,
|
||||
}
|
||||
|
||||
pub fn run<B: AutodiffBackend>(device: B::Device) {
|
||||
pub fn run<B: AutodiffBackend>(device: &B::Device) {
|
||||
// Create the configuration.
|
||||
let config_model = ModelConfig::new(10, 1024);
|
||||
let config_optimizer = AdamConfig::new();
|
||||
|
@ -36,7 +36,7 @@ pub fn run<B: AutodiffBackend>(device: B::Device) {
|
|||
B::seed(config.seed);
|
||||
|
||||
// Create the model and optimizer.
|
||||
let mut model = config.model.init();
|
||||
let mut model = config.model.init(device);
|
||||
let mut optim = config.optimizer.init();
|
||||
|
||||
// Create the batcher.
|
||||
|
|
|
@ -54,7 +54,7 @@ type Backend = Wgpu;
|
|||
|
||||
fn main() {
|
||||
// Creation of two tensors, the first with explicit values and the second one with ones, with the same shape as the first
|
||||
let tensor_1 = Tensor::<Backend, 2>::from_data([[2., 3.], [4., 5.]]);
|
||||
let tensor_1 = Tensor::<Backend, 2>::from_data_devauto([[2., 3.], [4., 5.]]);
|
||||
let tensor_2 = Tensor::<Backend, 2>::ones_like(&tensor_1);
|
||||
|
||||
// Print the element-wise addition (done with the WGPU backend) of the two tensors.
|
||||
|
|
|
@ -102,15 +102,16 @@ Here's how to use the imported model in your application:
|
|||
mod model;
|
||||
|
||||
use burn::tensor;
|
||||
use burn_ndarray::NdArray;
|
||||
use burn_ndarray::{NdArray, NdArrayDevice};
|
||||
use model::mnist::Model;
|
||||
|
||||
fn main() {
|
||||
// Initialize a new model instance
|
||||
let model: Model<NdArray<f32>> = Model::new();
|
||||
let device = NdArrayDevice::default();
|
||||
let model: Model<NdArray<f32>> = Model::new(&device);
|
||||
|
||||
// Create a sample input tensor (zeros for demonstration)
|
||||
let input = tensor::Tensor::<NdArray<f32>, 4>::zeros([1, 1, 28, 28]);
|
||||
let input = tensor::Tensor::<NdArray<f32>, 4>::zeros([1, 1, 28, 28], &device);
|
||||
|
||||
// Perform inference
|
||||
let output = model.forward(input);
|
||||
|
|
|
@ -137,8 +137,17 @@ impl<F: FloatCandleElement, I: IntCandleElement> TensorOps<Self> for Candle<F, I
|
|||
lhs: FloatTensor<Self, D>,
|
||||
rhs: FloatTensor<Self, D>,
|
||||
) -> FloatTensor<Self, D> {
|
||||
let rhs_contiguous = rhs.tensor.contiguous().unwrap();
|
||||
CandleTensor::new(lhs.tensor.broadcast_matmul(&rhs_contiguous).unwrap())
|
||||
let lhs_contiguous = if !lhs.tensor.is_contiguous() {
|
||||
lhs.tensor.contiguous().unwrap()
|
||||
} else {
|
||||
lhs.tensor
|
||||
};
|
||||
let rhs_contiguous = if !rhs.tensor.is_contiguous() {
|
||||
rhs.tensor.contiguous().unwrap()
|
||||
} else {
|
||||
rhs.tensor
|
||||
};
|
||||
CandleTensor::new(lhs_contiguous.broadcast_matmul(&rhs_contiguous).unwrap())
|
||||
}
|
||||
|
||||
fn swap_dims<const D: usize>(
|
||||
|
|
|
@ -113,7 +113,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn test_clip_by_value() {
|
||||
let gradient: Tensor<TestBackend, 2> = Tensor::from_floats([
|
||||
let gradient: Tensor<TestBackend, 2> = Tensor::from_floats_devauto([
|
||||
[0.6294, 0.0940, 0.8176, 0.8824, 0.5228, 0.4310],
|
||||
[0.7152, 0.9559, 0.7893, 0.5684, 0.5939, 0.8883],
|
||||
]);
|
||||
|
@ -128,7 +128,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn test_clip_by_norm() {
|
||||
let gradient: Tensor<TestBackend, 2> = Tensor::from_floats([
|
||||
let gradient: Tensor<TestBackend, 2> = Tensor::from_floats_devauto([
|
||||
[0.6294, 0.0940, 0.8176, 0.8824, 0.5228, 0.4310],
|
||||
[0.7152, 0.9559, 0.7893, 0.5684, 0.5939, 0.8883],
|
||||
]);
|
||||
|
|
|
@ -226,7 +226,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn tensor_load_record_setting() {
|
||||
let tensor = Tensor::<TestAutodiffBackend, 2>::ones([3, 3]);
|
||||
let tensor = Tensor::<TestAutodiffBackend, 2>::ones_devauto([3, 3]);
|
||||
|
||||
let byte_recorder = BinBytesRecorder::<FullPrecisionSettings>::default();
|
||||
let bytes = byte_recorder
|
||||
|
|
|
@ -228,7 +228,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn test_load_record_setting() {
|
||||
let tensor = Tensor::<TestAutodiffBackend, 2>::ones([3, 3]);
|
||||
let tensor = Tensor::<TestAutodiffBackend, 2>::ones_devauto([3, 3]);
|
||||
|
||||
let byte_recorder = BinBytesRecorder::<FullPrecisionSettings>::default();
|
||||
let bytes = byte_recorder
|
||||
|
@ -253,7 +253,8 @@ mod tests {
|
|||
#[test]
|
||||
fn test_init_with_record_setting() {
|
||||
let config = LinearConfig::new(32, 32);
|
||||
let module_init = config.init::<TestAutodiffBackend>();
|
||||
let device = Default::default();
|
||||
let module_init = config.init::<TestAutodiffBackend>(&device);
|
||||
|
||||
let record = module_init.clone().into_record();
|
||||
let module_init_with = config.init_with::<TestAutodiffBackend>(record);
|
||||
|
|
|
@ -10,10 +10,10 @@ pub fn generate_autoregressive_mask<B: Backend>(
|
|||
seq_length: usize,
|
||||
device: &B::Device,
|
||||
) -> Tensor<B, 3, Bool> {
|
||||
let mut mask = Tensor::<B, 3, Int>::zeros([1, seq_length, seq_length]);
|
||||
let mut mask = Tensor::<B, 3, Int>::zeros([1, seq_length, seq_length], device);
|
||||
|
||||
for i in 0..(seq_length - 1) {
|
||||
let values = Tensor::<B, 3, Int>::ones([1, 1, seq_length - (i + 1)]);
|
||||
let values = Tensor::<B, 3, Int>::ones_devauto([1, 1, seq_length - (i + 1)]);
|
||||
mask = mask.slice_assign([0..1, i..i + 1, i + 1..seq_length], values);
|
||||
}
|
||||
|
||||
|
@ -54,7 +54,7 @@ pub fn generate_padding_mask<B: Backend>(
|
|||
}
|
||||
}
|
||||
|
||||
let mut tensor = Tensor::zeros([batch_size, max_size]);
|
||||
let mut tensor = Tensor::zeros([batch_size, max_size], device);
|
||||
tensor = tensor.add_scalar(pad_token as i64);
|
||||
|
||||
for (index, tokens) in tokens_list.into_iter().enumerate() {
|
||||
|
@ -70,7 +70,7 @@ pub fn generate_padding_mask<B: Backend>(
|
|||
|
||||
tensor = tensor.slice_assign(
|
||||
[index..index + 1, 0..tokens.len()],
|
||||
Tensor::from_data(Data::new(
|
||||
Tensor::from_data_devauto(Data::new(
|
||||
tokens.into_iter().map(|e| (e as i64).elem()).collect(),
|
||||
Shape::new([1, seq_length]),
|
||||
)),
|
||||
|
|
|
@ -73,12 +73,19 @@ pub struct MhaInput<B: Backend> {
|
|||
}
|
||||
|
||||
impl MultiHeadAttentionConfig {
|
||||
/// Initialize a new [multihead attention](MultiHeadAttention) module
|
||||
/// on an automatically selected device.
|
||||
pub fn init_devauto<B: Backend>(&self) -> MultiHeadAttention<B> {
|
||||
let device = B::Device::default();
|
||||
self.init(&device)
|
||||
}
|
||||
|
||||
/// Initialize a new [multihead attention](MultiHeadAttention) module.
|
||||
pub fn init<B: Backend>(&self) -> MultiHeadAttention<B> {
|
||||
pub fn init<B: Backend>(&self, device: &B::Device) -> MultiHeadAttention<B> {
|
||||
let linear = |config: &Self| {
|
||||
nn::LinearConfig::new(config.d_model, config.d_model)
|
||||
.with_initializer(self.initializer.clone())
|
||||
.init()
|
||||
.init(device)
|
||||
};
|
||||
|
||||
MultiHeadAttention {
|
||||
|
@ -340,8 +347,8 @@ mod tests {
|
|||
#[test]
|
||||
fn test_self_attention_shapes() {
|
||||
let [batch_size, seq_length, d_model, n_heads] = [7, 13, 32, 4];
|
||||
let mha = MultiHeadAttentionConfig::new(d_model, n_heads).init::<TestBackend>();
|
||||
let input = MhaInput::self_attn(Tensor::random(
|
||||
let mha = MultiHeadAttentionConfig::new(d_model, n_heads).init_devauto::<TestBackend>();
|
||||
let input = MhaInput::self_attn(Tensor::random_devauto(
|
||||
[batch_size, seq_length, d_model],
|
||||
Distribution::Default,
|
||||
));
|
||||
|
@ -363,11 +370,12 @@ mod tests {
|
|||
#[test]
|
||||
fn test_generic_mha_shapes() {
|
||||
let [batch_size, seq_length_1, seq_length_2, d_model, n_heads] = [7, 13, 15, 32, 4];
|
||||
let mha = MultiHeadAttentionConfig::new(d_model, n_heads).init::<TestBackend>();
|
||||
let mha = MultiHeadAttentionConfig::new(d_model, n_heads)
|
||||
.init::<TestBackend>(&Default::default());
|
||||
let input = MhaInput::new(
|
||||
Tensor::random([batch_size, seq_length_1, d_model], Distribution::Default),
|
||||
Tensor::random([batch_size, seq_length_2, d_model], Distribution::Default),
|
||||
Tensor::random([batch_size, seq_length_2, d_model], Distribution::Default),
|
||||
Tensor::random_devauto([batch_size, seq_length_1, d_model], Distribution::Default),
|
||||
Tensor::random_devauto([batch_size, seq_length_2, d_model], Distribution::Default),
|
||||
Tensor::random_devauto([batch_size, seq_length_2, d_model], Distribution::Default),
|
||||
);
|
||||
|
||||
let output = mha.forward(input);
|
||||
|
@ -387,19 +395,22 @@ mod tests {
|
|||
#[test]
|
||||
fn test_self_attention_mask_pad() {
|
||||
let [batch_size, seq_length, d_model, n_heads, num_padded] = [3, 6, 32, 2, 2];
|
||||
let mha = MultiHeadAttentionConfig::new(d_model, n_heads).init::<TestBackend>();
|
||||
let device = Default::default();
|
||||
let mha = MultiHeadAttentionConfig::new(d_model, n_heads).init::<TestBackend>(&device);
|
||||
|
||||
// Create a padding mask
|
||||
let mask_pad: Tensor<TestBackend, 2, Int> = Tensor::zeros([batch_size, seq_length]);
|
||||
let mask_pad: Tensor<TestBackend, 2, Int> =
|
||||
Tensor::zeros([batch_size, seq_length], &device);
|
||||
let mask_pad = mask_pad.slice_assign(
|
||||
[0..batch_size, seq_length - num_padded..seq_length],
|
||||
Tensor::ones([batch_size, num_padded]),
|
||||
Tensor::ones_devauto([batch_size, num_padded]),
|
||||
);
|
||||
let mask_pad = mask_pad.equal_elem(1);
|
||||
let mask_pad = mask_pad.equal_elem(1).to_device(&device);
|
||||
|
||||
let tensor_1 = Tensor::<TestBackend, 3>::random(
|
||||
[batch_size, seq_length, d_model],
|
||||
Distribution::Default,
|
||||
&device,
|
||||
);
|
||||
// Change the end of the tensor
|
||||
let tensor_2 = tensor_1.clone().slice_assign(
|
||||
|
@ -408,7 +419,11 @@ mod tests {
|
|||
seq_length - num_padded..seq_length,
|
||||
0..d_model,
|
||||
],
|
||||
Tensor::random([batch_size, num_padded, d_model], Distribution::Default),
|
||||
Tensor::random(
|
||||
[batch_size, num_padded, d_model],
|
||||
Distribution::Default,
|
||||
&device,
|
||||
),
|
||||
);
|
||||
|
||||
let input_1 = MhaInput::self_attn(tensor_1).mask_pad(mask_pad.clone());
|
||||
|
@ -434,11 +449,13 @@ mod tests {
|
|||
#[test]
|
||||
fn test_autoregressive_mask_should_have_same_output_as_autoregressive_decoding() {
|
||||
let [batch_size, seq_length, d_model, n_heads] = [3, 4, 12, 2];
|
||||
let mha = MultiHeadAttentionConfig::new(d_model, n_heads).init::<TestBackend>();
|
||||
let device = Default::default();
|
||||
let mha = MultiHeadAttentionConfig::new(d_model, n_heads).init::<TestBackend>(&device);
|
||||
|
||||
let tensor = Tensor::<TestBackend, 3>::random(
|
||||
[batch_size, seq_length, d_model],
|
||||
Distribution::Default,
|
||||
&device,
|
||||
);
|
||||
let mask_attn = generate_autoregressive_mask(batch_size, seq_length, &tensor.device());
|
||||
let input = MhaInput::self_attn(tensor.clone()).mask_attn(mask_attn);
|
||||
|
|
|
@ -60,8 +60,14 @@ pub struct Conv1d<B: Backend> {
|
|||
}
|
||||
|
||||
impl Conv1dConfig {
|
||||
/// Initialize a new [conv1d](Conv1d) module on an automatically selected device.
|
||||
pub fn init_devauto<B: Backend>(&self) -> Conv1d<B> {
|
||||
let device = B::Device::default();
|
||||
self.init(&device)
|
||||
}
|
||||
|
||||
/// Initialize a new [conv1d](Conv1d) module.
|
||||
pub fn init<B: Backend>(&self) -> Conv1d<B> {
|
||||
pub fn init<B: Backend>(&self, device: &B::Device) -> Conv1d<B> {
|
||||
checks::checks_channels_div_groups(self.channels_in, self.channels_out, self.groups);
|
||||
|
||||
let shape = [
|
||||
|
@ -71,14 +77,17 @@ impl Conv1dConfig {
|
|||
];
|
||||
|
||||
let fan_in: usize = self.channels_in / self.groups * self.kernel_size;
|
||||
let weight = self.initializer.init_with(shape, Some(fan_in), None);
|
||||
let weight = self
|
||||
.initializer
|
||||
.init_with(shape, Some(fan_in), None, device);
|
||||
let mut bias = None;
|
||||
|
||||
if self.bias {
|
||||
bias = Some(
|
||||
self.initializer
|
||||
.init_with([self.channels_out], Some(fan_in), None),
|
||||
);
|
||||
bias =
|
||||
Some(
|
||||
self.initializer
|
||||
.init_with([self.channels_out], Some(fan_in), None, device),
|
||||
);
|
||||
}
|
||||
|
||||
Conv1d {
|
||||
|
@ -140,7 +149,7 @@ mod tests {
|
|||
let config = Conv1dConfig::new(5, 5, 5);
|
||||
let k = (config.channels_in * config.kernel_size) as f64;
|
||||
let k = sqrt(config.groups as f64 / k) as f32;
|
||||
let conv = config.init::<TestBackend>();
|
||||
let conv = config.init_devauto::<TestBackend>();
|
||||
|
||||
conv.weight.to_data().assert_within_range(-k..k);
|
||||
}
|
||||
|
@ -150,7 +159,7 @@ mod tests {
|
|||
TestBackend::seed(0);
|
||||
|
||||
let config = Conv1dConfig::new(5, 5, 5).with_initializer(Initializer::Zeros);
|
||||
let conv = config.init::<TestBackend>();
|
||||
let conv = config.init::<TestBackend>(&Default::default());
|
||||
|
||||
assert_eq!(config.initializer, Initializer::Zeros);
|
||||
conv.weight
|
||||
|
|
|
@ -59,8 +59,14 @@ pub struct Conv2d<B: Backend> {
|
|||
}
|
||||
|
||||
impl Conv2dConfig {
|
||||
/// Initialize a new [conv2d](Conv2d) module on an automatically selected device.
|
||||
pub fn init_devauto<B: Backend>(&self) -> Conv2d<B> {
|
||||
let device = B::Device::default();
|
||||
self.init(&device)
|
||||
}
|
||||
|
||||
/// Initialize a new [conv2d](Conv2d) module.
|
||||
pub fn init<B: Backend>(&self) -> Conv2d<B> {
|
||||
pub fn init<B: Backend>(&self, device: &B::Device) -> Conv2d<B> {
|
||||
checks::checks_channels_div_groups(self.channels[0], self.channels[1], self.groups);
|
||||
|
||||
let shape = [
|
||||
|
@ -71,13 +77,15 @@ impl Conv2dConfig {
|
|||
];
|
||||
|
||||
let fan_in = self.channels[0] / self.groups * self.kernel_size.iter().product::<usize>();
|
||||
let weight = self.initializer.init_with(shape, Some(fan_in), None);
|
||||
let weight = self
|
||||
.initializer
|
||||
.init_with(shape, Some(fan_in), None, device);
|
||||
let mut bias = None;
|
||||
|
||||
if self.bias {
|
||||
bias = Some(
|
||||
self.initializer
|
||||
.init_with([self.channels[1]], Some(fan_in), None),
|
||||
.init_with([self.channels[1]], Some(fan_in), None, device),
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -140,7 +148,7 @@ mod tests {
|
|||
let config = Conv2dConfig::new([5, 1], [5, 5]);
|
||||
let k = (config.channels[0] * config.kernel_size[0] * config.kernel_size[1]) as f64;
|
||||
let k = sqrt(config.groups as f64 / k) as f32;
|
||||
let conv = config.init::<TestBackend>();
|
||||
let conv = config.init_devauto::<TestBackend>();
|
||||
|
||||
conv.weight.to_data().assert_within_range(-k..k);
|
||||
}
|
||||
|
@ -150,7 +158,8 @@ mod tests {
|
|||
TestBackend::seed(0);
|
||||
|
||||
let config = Conv2dConfig::new([5, 2], [5, 5]).with_initializer(Initializer::Zeros);
|
||||
let conv = config.init::<TestBackend>();
|
||||
let device = Default::default();
|
||||
let conv = config.init::<TestBackend>(&device);
|
||||
|
||||
assert_eq!(config.initializer, Initializer::Zeros);
|
||||
conv.weight
|
||||
|
|
|
@ -62,8 +62,15 @@ pub struct ConvTranspose1d<B: Backend> {
|
|||
}
|
||||
|
||||
impl ConvTranspose1dConfig {
|
||||
/// Initialize a new [conv transpose 1d](ConvTranspose1d) module
|
||||
/// on an automatically selected device.
|
||||
pub fn init_devauto<B: Backend>(&self) -> ConvTranspose1d<B> {
|
||||
let device = B::Device::default();
|
||||
self.init(&device)
|
||||
}
|
||||
|
||||
/// Initialize a new [conv transpose 1d](ConvTranspose1d) module.
|
||||
pub fn init<B: Backend>(&self) -> ConvTranspose1d<B> {
|
||||
pub fn init<B: Backend>(&self, device: &B::Device) -> ConvTranspose1d<B> {
|
||||
checks::checks_channels_div_groups(self.channels[0], self.channels[1], self.groups);
|
||||
|
||||
let shape = [
|
||||
|
@ -73,13 +80,15 @@ impl ConvTranspose1dConfig {
|
|||
];
|
||||
|
||||
let fan_in = self.channels[1] / self.groups * self.kernel_size;
|
||||
let weight = self.initializer.init_with(shape, Some(fan_in), None);
|
||||
let weight = self
|
||||
.initializer
|
||||
.init_with(shape, Some(fan_in), None, device);
|
||||
let mut bias = None;
|
||||
|
||||
if self.bias {
|
||||
bias = Some(
|
||||
self.initializer
|
||||
.init_with([self.channels[1]], Some(fan_in), None),
|
||||
.init_with([self.channels[1]], Some(fan_in), None, device),
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -146,7 +155,7 @@ mod tests {
|
|||
let config = ConvTranspose1dConfig::new([5, 1], 5);
|
||||
let k = (config.channels[1] * config.kernel_size) as f64;
|
||||
let k = sqrt(config.groups as f64 / k) as f32;
|
||||
let conv = config.init::<TestBackend>();
|
||||
let conv = config.init_devauto::<TestBackend>();
|
||||
|
||||
conv.weight.to_data().assert_within_range(-k..k);
|
||||
}
|
||||
|
@ -156,7 +165,7 @@ mod tests {
|
|||
TestBackend::seed(0);
|
||||
|
||||
let config = ConvTranspose1dConfig::new([5, 2], 5).with_initializer(Initializer::Zeros);
|
||||
let conv = config.init::<TestBackend>();
|
||||
let conv = config.init::<TestBackend>(&Default::default());
|
||||
|
||||
assert_eq!(config.initializer, Initializer::Zeros);
|
||||
conv.weight
|
||||
|
|
|
@ -62,8 +62,15 @@ pub struct ConvTranspose2d<B: Backend> {
|
|||
}
|
||||
|
||||
impl ConvTranspose2dConfig {
|
||||
/// Initialize a new [conv transpose 2d](ConvTranspose2d) module
|
||||
/// on an automatically selected device.
|
||||
pub fn init_devauto<B: Backend>(&self) -> ConvTranspose2d<B> {
|
||||
let device = B::Device::default();
|
||||
self.init(&device)
|
||||
}
|
||||
|
||||
/// Initialize a new [conv transpose 2d](ConvTranspose2d) module.
|
||||
pub fn init<B: Backend>(&self) -> ConvTranspose2d<B> {
|
||||
pub fn init<B: Backend>(&self, device: &B::Device) -> ConvTranspose2d<B> {
|
||||
checks::checks_channels_div_groups(self.channels[0], self.channels[1], self.groups);
|
||||
|
||||
let shape = [
|
||||
|
@ -74,13 +81,15 @@ impl ConvTranspose2dConfig {
|
|||
];
|
||||
|
||||
let fan_in = self.channels[1] / self.groups * self.kernel_size.iter().product::<usize>();
|
||||
let weight = self.initializer.init_with(shape, Some(fan_in), None);
|
||||
let weight = self
|
||||
.initializer
|
||||
.init_with(shape, Some(fan_in), None, device);
|
||||
let mut bias = None;
|
||||
|
||||
if self.bias {
|
||||
bias = Some(
|
||||
self.initializer
|
||||
.init_with([self.channels[1]], Some(fan_in), None),
|
||||
.init_with([self.channels[1]], Some(fan_in), None, device),
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -147,7 +156,7 @@ mod tests {
|
|||
let config = ConvTranspose2dConfig::new([5, 1], [5, 5]);
|
||||
let k = (config.channels[1] * config.kernel_size[0] * config.kernel_size[1]) as f64;
|
||||
let k = sqrt(config.groups as f64 / k) as f32;
|
||||
let conv = config.init::<TestBackend>();
|
||||
let conv = config.init_devauto::<TestBackend>();
|
||||
|
||||
conv.weight.to_data().assert_within_range(-k..k);
|
||||
}
|
||||
|
@ -158,7 +167,7 @@ mod tests {
|
|||
|
||||
let config =
|
||||
ConvTranspose2dConfig::new([5, 2], [5, 5]).with_initializer(Initializer::Zeros);
|
||||
let conv = config.init::<TestBackend>();
|
||||
let conv = config.init::<TestBackend>(&Default::default());
|
||||
|
||||
assert_eq!(config.initializer, Initializer::Zeros);
|
||||
conv.weight
|
||||
|
|
|
@ -64,7 +64,7 @@ mod tests {
|
|||
#[cfg(feature = "std")]
|
||||
#[test]
|
||||
fn with_ad_backend_should_mark_input() {
|
||||
let tensor = Tensor::<TestAutodiffBackend, 2>::ones(Shape::new([100, 100]));
|
||||
let tensor = Tensor::<TestAutodiffBackend, 2>::ones_devauto(Shape::new([100, 100]));
|
||||
let dropout = DropoutConfig::new(0.5).init();
|
||||
|
||||
let output = dropout.forward(tensor.clone());
|
||||
|
@ -74,7 +74,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn without_ad_backend_should_not_change_input() {
|
||||
let tensor = Tensor::<TestBackend, 2>::ones(Shape::new([100, 100]));
|
||||
let tensor = Tensor::<TestBackend, 2>::ones_devauto(Shape::new([100, 100]));
|
||||
let dropout = DropoutConfig::new(0.5).init();
|
||||
|
||||
let output = dropout.forward(tensor.clone());
|
||||
|
|
|
@ -33,16 +33,23 @@ pub struct Embedding<B: Backend> {
|
|||
|
||||
impl EmbeddingConfig {
|
||||
/// Initialize a new [embedding](Embedding) module.
|
||||
pub fn init<B: Backend>(&self) -> Embedding<B> {
|
||||
pub fn init<B: Backend>(&self, device: &B::Device) -> Embedding<B> {
|
||||
let weight = self
|
||||
.initializer
|
||||
.init([self.n_embedding, self.d_model])
|
||||
.init([self.n_embedding, self.d_model], device)
|
||||
.require_grad();
|
||||
|
||||
Embedding {
|
||||
weight: Param::from(weight),
|
||||
}
|
||||
}
|
||||
|
||||
/// Initialize a new [embedding](Embedding) module on an automatically selected device.
|
||||
pub fn init_devauto<B: Backend>(&self) -> Embedding<B> {
|
||||
let device = B::Device::default();
|
||||
self.init(&device)
|
||||
}
|
||||
|
||||
/// Initialize a new [embedding](Embedding) module with a [record](EmbeddingRecord).
|
||||
pub fn init_with<B: Backend>(&self, record: EmbeddingRecord<B>) -> Embedding<B> {
|
||||
Embedding {
|
||||
|
@ -74,7 +81,7 @@ mod tests {
|
|||
TestBackend::seed(0);
|
||||
|
||||
let config = EmbeddingConfig::new(100, 10);
|
||||
let embed = config.init::<TestBackend>();
|
||||
let embed = config.init_devauto::<TestBackend>();
|
||||
let weights = embed.weight.val().reshape([1000]);
|
||||
let (var_act, mean_act) = weights.var_mean(0);
|
||||
|
||||
|
@ -96,7 +103,7 @@ mod tests {
|
|||
TestBackend::seed(0);
|
||||
|
||||
let config = EmbeddingConfig::new(5, 5).with_initializer(Initializer::Zeros);
|
||||
let embed = config.init::<TestBackend>();
|
||||
let embed = config.init::<TestBackend>(&Default::default());
|
||||
|
||||
assert_eq!(config.initializer, Initializer::Zeros);
|
||||
embed
|
||||
|
|
|
@ -73,8 +73,12 @@ impl Initializer {
|
|||
/// # Params
|
||||
///
|
||||
/// - shape: Shape of the initiated tensor.
|
||||
pub fn init<B: Backend, const D: usize, S: Into<Shape<D>>>(&self, shape: S) -> Tensor<B, D> {
|
||||
self.init_with(shape, None, None)
|
||||
pub fn init<B: Backend, const D: usize, S: Into<Shape<D>>>(
|
||||
&self,
|
||||
shape: S,
|
||||
device: &B::Device,
|
||||
) -> Tensor<B, D> {
|
||||
self.init_with(shape, None, None, device)
|
||||
}
|
||||
|
||||
/// Inits a tensor of given shape with values depending on initializer kind, with the possibility
|
||||
|
@ -90,29 +94,30 @@ impl Initializer {
|
|||
shape: S,
|
||||
fan_in: Option<usize>,
|
||||
fan_out: Option<usize>,
|
||||
device: &B::Device,
|
||||
) -> Tensor<B, D> {
|
||||
let shape = shape.into();
|
||||
match self {
|
||||
Initializer::Constant { value } => Tensor::<B, D>::full(shape, *value),
|
||||
Initializer::Ones => Tensor::<B, D>::ones(shape),
|
||||
Initializer::Zeros => Tensor::<B, D>::zeros(shape),
|
||||
Initializer::Uniform { min, max } => uniform_draw(shape, *min, *max),
|
||||
Initializer::Normal { mean, std } => normal_draw(shape, *mean, *std),
|
||||
Initializer::Constant { value } => Tensor::<B, D>::full(shape, *value, device),
|
||||
Initializer::Ones => Tensor::<B, D>::ones(shape, device),
|
||||
Initializer::Zeros => Tensor::<B, D>::zeros(shape, device),
|
||||
Initializer::Uniform { min, max } => uniform_draw(shape, *min, *max, device),
|
||||
Initializer::Normal { mean, std } => normal_draw(shape, *mean, *std, device),
|
||||
Initializer::KaimingUniform { gain, fan_out_only } => {
|
||||
let a = sqrt(3.0) * *gain * self.kaiming_std(*fan_out_only, fan_in, fan_out);
|
||||
uniform_draw(shape, -a, a)
|
||||
uniform_draw(shape, -a, a, device)
|
||||
}
|
||||
Initializer::KaimingNormal { gain, fan_out_only } => {
|
||||
let std = *gain * self.kaiming_std(*fan_out_only, fan_in, fan_out);
|
||||
normal_draw(shape, 0.0, std)
|
||||
normal_draw(shape, 0.0, std, device)
|
||||
}
|
||||
Initializer::XavierUniform { gain } => {
|
||||
let a = sqrt(3.0) * *gain * self.xavier_std(fan_in, fan_out);
|
||||
uniform_draw(shape, -a, a)
|
||||
uniform_draw(shape, -a, a, device)
|
||||
}
|
||||
Initializer::XavierNormal { gain } => {
|
||||
let std = *gain * self.xavier_std(fan_in, fan_out);
|
||||
normal_draw(shape, 0.0, std)
|
||||
normal_draw(shape, 0.0, std, device)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -148,18 +153,20 @@ fn uniform_draw<B: Backend, const D: usize, S: Into<Shape<D>>>(
|
|||
shape: S,
|
||||
low: f64,
|
||||
high: f64,
|
||||
device: &B::Device,
|
||||
) -> Tensor<B, D> {
|
||||
let distribution = Distribution::Uniform(low, high);
|
||||
Tensor::<B, D>::random(shape, distribution)
|
||||
Tensor::<B, D>::random(shape, distribution, device)
|
||||
}
|
||||
|
||||
fn normal_draw<B: Backend, const D: usize, S: Into<Shape<D>>>(
|
||||
shape: S,
|
||||
mean: f64,
|
||||
std: f64,
|
||||
device: &B::Device,
|
||||
) -> Tensor<B, D> {
|
||||
let distribution = Distribution::Normal(mean, std);
|
||||
Tensor::<B, D>::random(shape, distribution)
|
||||
Tensor::<B, D>::random(shape, distribution, device)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
@ -194,7 +201,7 @@ mod tests {
|
|||
|
||||
let (min, max) = (0.0, 1.0);
|
||||
let uniform = Initializer::Uniform { min, max };
|
||||
let tensor: Tensor<TB, 4> = uniform.init([2, 2, 2, 2]);
|
||||
let tensor: Tensor<TB, 4> = uniform.init([2, 2, 2, 2], &Default::default());
|
||||
|
||||
tensor.into_data().assert_within_range(min..max);
|
||||
}
|
||||
|
@ -204,7 +211,8 @@ mod tests {
|
|||
// seed random generator
|
||||
TB::seed(0);
|
||||
let (mean, std) = (0.0, 1.0);
|
||||
let normal: Tensor<TB, 1> = Initializer::Normal { mean, std }.init([1000]);
|
||||
let normal: Tensor<TB, 1> =
|
||||
Initializer::Normal { mean, std }.init([1000], &Default::default());
|
||||
let (var_act, mean_act) = normal.var_mean(0);
|
||||
|
||||
let var_act: f32 = var_act.into_scalar().elem();
|
||||
|
@ -223,7 +231,8 @@ mod tests {
|
|||
#[test]
|
||||
fn initializer_constant_init() {
|
||||
let value = 5.0;
|
||||
let constants: Tensor<TB, 4> = Initializer::Constant { value }.init([2, 2, 2, 2]);
|
||||
let constants: Tensor<TB, 4> =
|
||||
Initializer::Constant { value }.init([2, 2, 2, 2], &Default::default());
|
||||
constants
|
||||
.sum()
|
||||
.to_data()
|
||||
|
@ -232,7 +241,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn initializer_zeros_init() {
|
||||
let zeros: Tensor<TB, 4> = Initializer::Zeros.init([2, 2, 2, 2]);
|
||||
let zeros: Tensor<TB, 4> = Initializer::Zeros.init([2, 2, 2, 2], &Default::default());
|
||||
zeros
|
||||
.sum()
|
||||
.to_data()
|
||||
|
@ -241,7 +250,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn initializer_ones_init() {
|
||||
let ones: Tensor<TB, 4> = Initializer::Ones.init([2, 2, 2, 2]);
|
||||
let ones: Tensor<TB, 4> = Initializer::Ones.init([2, 2, 2, 2], &Default::default());
|
||||
ones.sum()
|
||||
.to_data()
|
||||
.assert_approx_eq(&Data::from([16.0]), 3);
|
||||
|
@ -259,7 +268,7 @@ mod tests {
|
|||
gain,
|
||||
fan_out_only: false,
|
||||
}
|
||||
.init_with([fan_out, fan_in], Some(fan_in), None);
|
||||
.init_with([fan_out, fan_in], Some(fan_in), None, &Default::default());
|
||||
tensor.into_data().assert_within_range(-k..k);
|
||||
}
|
||||
|
||||
|
@ -276,7 +285,7 @@ mod tests {
|
|||
gain,
|
||||
fan_out_only: false,
|
||||
}
|
||||
.init_with([fan_out, fan_in], Some(fan_in), None);
|
||||
.init_with([fan_out, fan_in], Some(fan_in), None, &Default::default());
|
||||
assert_normal_init(expected_mean, expected_var, &tensor)
|
||||
}
|
||||
|
||||
|
@ -293,7 +302,7 @@ mod tests {
|
|||
gain,
|
||||
fan_out_only: false,
|
||||
}
|
||||
.init_with(shape, Some(fan_in), None);
|
||||
.init_with(shape, Some(fan_in), None, &Default::default());
|
||||
tensor.into_data().assert_within_range(-k..k);
|
||||
}
|
||||
|
||||
|
@ -309,7 +318,7 @@ mod tests {
|
|||
gain,
|
||||
fan_out_only: true,
|
||||
}
|
||||
.init_with([fan_out, fan_in], None, Some(fan_out));
|
||||
.init_with([fan_out, fan_in], None, Some(fan_out), &Default::default());
|
||||
tensor.into_data().assert_within_range(-k..k);
|
||||
}
|
||||
|
||||
|
@ -325,7 +334,7 @@ mod tests {
|
|||
gain,
|
||||
fan_out_only: false,
|
||||
}
|
||||
.init([fan_out, fan_in]);
|
||||
.init([fan_out, fan_in], &Default::default());
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -339,6 +348,7 @@ mod tests {
|
|||
[fan_out, fan_in],
|
||||
Some(fan_in),
|
||||
Some(fan_out),
|
||||
&Default::default(),
|
||||
);
|
||||
|
||||
tensor.into_data().assert_within_range(-bound..bound);
|
||||
|
@ -357,6 +367,7 @@ mod tests {
|
|||
[fan_out, fan_in],
|
||||
Some(fan_in),
|
||||
Some(fan_out),
|
||||
&Default::default(),
|
||||
);
|
||||
assert_normal_init(expected_mean, expected_var, &tensor)
|
||||
}
|
||||
|
@ -368,6 +379,7 @@ mod tests {
|
|||
|
||||
let gain = 2.;
|
||||
let (fan_in, fan_out) = (5, 6);
|
||||
let _: Tensor<TB, 2> = Initializer::XavierUniform { gain }.init([fan_out, fan_in]);
|
||||
let _: Tensor<TB, 2> =
|
||||
Initializer::XavierUniform { gain }.init([fan_out, fan_in], &Default::default());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -37,17 +37,24 @@ pub struct Linear<B: Backend> {
|
|||
}
|
||||
|
||||
impl LinearConfig {
|
||||
/// Initialize a new [linear](Linear) module on an automatically selected device.
|
||||
pub fn init_devauto<B: Backend>(&self) -> Linear<B> {
|
||||
let device = B::Device::default();
|
||||
self.init(&device)
|
||||
}
|
||||
|
||||
/// Initialize a new [linear](Linear) module.
|
||||
pub fn init<B: Backend>(&self) -> Linear<B> {
|
||||
pub fn init<B: Backend>(&self, device: &B::Device) -> Linear<B> {
|
||||
let shape = [self.d_input, self.d_output];
|
||||
let weight = self
|
||||
.initializer
|
||||
.init_with(shape, Some(self.d_input), Some(self.d_output));
|
||||
let weight =
|
||||
self.initializer
|
||||
.init_with(shape, Some(self.d_input), Some(self.d_output), device);
|
||||
let bias = if self.bias {
|
||||
Some(self.initializer.init_with(
|
||||
[self.d_output],
|
||||
Some(self.d_input),
|
||||
Some(self.d_output),
|
||||
device,
|
||||
))
|
||||
} else {
|
||||
None
|
||||
|
@ -98,7 +105,7 @@ mod tests {
|
|||
|
||||
let config = LinearConfig::new(5, 5);
|
||||
let k = sqrt(1.0 / config.d_input as f64) as f32;
|
||||
let linear = config.init::<TestBackend>();
|
||||
let linear = config.init_devauto::<TestBackend>();
|
||||
|
||||
assert_eq!(
|
||||
config.initializer,
|
||||
|
@ -115,7 +122,8 @@ mod tests {
|
|||
TestBackend::seed(0);
|
||||
|
||||
let config = LinearConfig::new(5, 5).with_initializer(Initializer::Zeros);
|
||||
let linear = config.init::<TestBackend>();
|
||||
let device = Default::default();
|
||||
let linear = config.init::<TestBackend>(&device);
|
||||
|
||||
assert_eq!(config.initializer, Initializer::Zeros);
|
||||
linear
|
||||
|
@ -132,11 +140,12 @@ mod tests {
|
|||
let config = LinearConfig::new(2, 3)
|
||||
.with_initializer(Initializer::Constant { value })
|
||||
.with_bias(false);
|
||||
let linear = config.init();
|
||||
let device = Default::default();
|
||||
let linear = config.init::<TestBackend>(&device);
|
||||
|
||||
let input = Tensor::<TestBackend, 2>::ones(Shape::new([1, 2]));
|
||||
let input = Tensor::<TestBackend, 2>::ones(Shape::new([1, 2]), &device);
|
||||
let result = linear.forward(input);
|
||||
let expected_result = Tensor::<TestBackend, 2>::from_data([[4., 4., 4.]]);
|
||||
let expected_result = Tensor::<TestBackend, 2>::from_data([[4., 4., 4.]], &device);
|
||||
|
||||
assert_eq!(result.into_data(), expected_result.into_data());
|
||||
}
|
||||
|
@ -145,13 +154,15 @@ mod tests {
|
|||
fn test_linear_forward_with_bias() {
|
||||
TestBackend::seed(0);
|
||||
|
||||
let device = Default::default();
|
||||
|
||||
let value = 2.;
|
||||
let config = LinearConfig::new(2, 3).with_initializer(Initializer::Constant { value });
|
||||
let linear = config.init();
|
||||
let linear = config.init::<TestBackend>(&device);
|
||||
|
||||
let input = Tensor::<TestBackend, 2>::ones(Shape::new([1, 2]));
|
||||
let input = Tensor::<TestBackend, 2>::ones(Shape::new([1, 2]), &device);
|
||||
let result = linear.forward(input);
|
||||
let expected_result = Tensor::<TestBackend, 2>::from_data([[6., 6., 6.]]);
|
||||
let expected_result = Tensor::<TestBackend, 2>::from_data([[6., 6., 6.]], &device);
|
||||
|
||||
assert_eq!(result.into_data(), expected_result.into_data());
|
||||
}
|
||||
|
|
|
@ -36,7 +36,7 @@ impl BinaryCrossEntropyLossConfig {
|
|||
weights: self
|
||||
.weights
|
||||
.as_ref()
|
||||
.map(|e| Tensor::<B, 1>::from_floats(e.as_slice())),
|
||||
.map(|e| Tensor::<B, 1>::from_floats_devauto(e.as_slice())),
|
||||
smoothing: self.smoothing,
|
||||
logits: self.logits,
|
||||
}
|
||||
|
@ -46,8 +46,7 @@ impl BinaryCrossEntropyLossConfig {
|
|||
if let Some(alpha) = self.smoothing {
|
||||
assert!(
|
||||
(0.0..=1.).contains(&alpha),
|
||||
"Alpha of Cross-entropy loss with smoothed labels should be in interval [0, 1]. \
|
||||
Got {}",
|
||||
"Alpha of Cross-entropy loss with smoothed labels should be in interval [0, 1]. Got {}",
|
||||
alpha
|
||||
);
|
||||
};
|
||||
|
@ -123,8 +122,9 @@ mod tests {
|
|||
#[test]
|
||||
fn test_binary_cross_entropy() {
|
||||
let [batch_size] = [4];
|
||||
let logits = Tensor::<TestBackend, 1>::random([batch_size], Distribution::Normal(0., 1.0));
|
||||
let targets = Tensor::<TestBackend, 1, Int>::from_data(Data::from([0, 1, 0, 1]));
|
||||
let logits =
|
||||
Tensor::<TestBackend, 1>::random_devauto([batch_size], Distribution::Normal(0., 1.0));
|
||||
let targets = Tensor::<TestBackend, 1, Int>::from_data_devauto(Data::from([0, 1, 0, 1]));
|
||||
|
||||
let loss_1 = BinaryCrossEntropyLossConfig::new()
|
||||
.init()
|
||||
|
@ -139,8 +139,9 @@ mod tests {
|
|||
#[test]
|
||||
fn test_binary_cross_entropy_with_weights() {
|
||||
let [batch_size] = [4];
|
||||
let logits = Tensor::<TestBackend, 1>::random([batch_size], Distribution::Normal(0., 1.0));
|
||||
let targets = Tensor::<TestBackend, 1, Int>::from_data(Data::from([0, 1, 0, 1]));
|
||||
let logits =
|
||||
Tensor::<TestBackend, 1>::random_devauto([batch_size], Distribution::Normal(0., 1.0));
|
||||
let targets = Tensor::<TestBackend, 1, Int>::from_data_devauto(Data::from([0, 1, 0, 1]));
|
||||
let weights = [3., 7.];
|
||||
|
||||
let loss_1 = BinaryCrossEntropyLossConfig::new()
|
||||
|
@ -151,7 +152,7 @@ mod tests {
|
|||
let loss_2 = targets.clone().float() * logits.clone().log()
|
||||
+ (-targets.float() + 1) * (-logits + 1).log();
|
||||
|
||||
let loss_2 = loss_2 * Tensor::from_floats([3., 7., 3., 7.]);
|
||||
let loss_2 = loss_2 * Tensor::from_floats_devauto([3., 7., 3., 7.]);
|
||||
let loss_2 = loss_2.neg().sum() / (3. + 3. + 7. + 7.);
|
||||
loss_1.into_data().assert_approx_eq(&loss_2.into_data(), 3);
|
||||
}
|
||||
|
@ -159,8 +160,9 @@ mod tests {
|
|||
#[test]
|
||||
fn test_binary_cross_entropy_with_smoothing() {
|
||||
let [batch_size] = [4];
|
||||
let logits = Tensor::<TestBackend, 1>::random([batch_size], Distribution::Normal(0., 1.0));
|
||||
let targets = Tensor::<TestBackend, 1, Int>::from_data(Data::from([0, 1, 0, 1]));
|
||||
let logits =
|
||||
Tensor::<TestBackend, 1>::random_devauto([batch_size], Distribution::Normal(0., 1.0));
|
||||
let targets = Tensor::<TestBackend, 1, Int>::from_data_devauto(Data::from([0, 1, 0, 1]));
|
||||
|
||||
let loss_1 = BinaryCrossEntropyLossConfig::new()
|
||||
.with_smoothing(Some(0.1))
|
||||
|
|
|
@ -44,7 +44,7 @@ impl CrossEntropyLossConfig {
|
|||
weights: self
|
||||
.weights
|
||||
.as_ref()
|
||||
.map(|e| Tensor::<B, 1>::from_floats(e.as_slice())),
|
||||
.map(|e| Tensor::<B, 1>::from_floats_devauto(e.as_slice())),
|
||||
smoothing: self.smoothing,
|
||||
logits: self.logits,
|
||||
}
|
||||
|
@ -54,8 +54,7 @@ impl CrossEntropyLossConfig {
|
|||
if let Some(alpha) = self.smoothing {
|
||||
assert!(
|
||||
(0.0..=1.).contains(&alpha),
|
||||
"Alpha of Cross-entropy loss with smoothed labels should be in interval [0, 1]. \
|
||||
Got {}",
|
||||
"Alpha of Cross-entropy loss with smoothed labels should be in interval [0, 1]. Got {}",
|
||||
alpha
|
||||
);
|
||||
};
|
||||
|
@ -168,10 +167,10 @@ impl<B: Backend> CrossEntropyLoss<B> {
|
|||
) -> Tensor<B, 2> {
|
||||
let [batch_size, nr_classes] = shape;
|
||||
let device = &targets.device();
|
||||
let targets_matrix = Tensor::<B, 2>::zeros_device(shape, device).scatter(
|
||||
let targets_matrix = Tensor::<B, 2>::zeros(shape, device).scatter(
|
||||
1,
|
||||
targets.reshape([batch_size, 1]),
|
||||
Tensor::ones_device([batch_size, 1], device),
|
||||
Tensor::ones([batch_size, 1], device),
|
||||
);
|
||||
targets_matrix * (1. - alpha) + alpha / nr_classes as f32
|
||||
}
|
||||
|
@ -227,12 +226,13 @@ mod tests {
|
|||
macro_rules! setup {
|
||||
() => {{
|
||||
let [batch_size, num_targets] = [4, 5];
|
||||
let logits = Tensor::<TestBackend, 2>::random(
|
||||
let logits = Tensor::<TestBackend, 2>::random_devauto(
|
||||
[batch_size, num_targets],
|
||||
Distribution::Normal(0., 1.0),
|
||||
);
|
||||
let targets = Tensor::<TestBackend, 1, Int>::from_data(Data::from([2, 0, 4, 1]));
|
||||
let targets_logits = Tensor::<TestBackend, 2>::from_data(Data::from([
|
||||
let targets =
|
||||
Tensor::<TestBackend, 1, Int>::from_data_devauto(Data::from([2, 0, 4, 1]));
|
||||
let targets_logits = Tensor::<TestBackend, 2>::from_data_devauto(Data::from([
|
||||
[0.0, 0.0, 1.0, 0.0, 0.0],
|
||||
[1.0, 0.0, 0.0, 0.0, 0.0],
|
||||
[0.0, 0.0, 0.0, 0.0, 1.0],
|
||||
|
@ -245,14 +245,14 @@ mod tests {
|
|||
macro_rules! setup_padded {
|
||||
() => {{
|
||||
let [batch_size, num_targets, pad_index] = [4, 5, 1];
|
||||
let logits = Tensor::<TestBackend, 2>::random(
|
||||
let logits = Tensor::<TestBackend, 2>::random_devauto(
|
||||
[batch_size, num_targets],
|
||||
Distribution::Normal(0., 1.0),
|
||||
);
|
||||
let targets = Tensor::<TestBackend, 1, Int>::from_data(
|
||||
let targets = Tensor::<TestBackend, 1, Int>::from_data_devauto(
|
||||
Data::<i64, 1>::from([2, 0, 4, pad_index as i64]).convert(),
|
||||
);
|
||||
let targets_logits = Tensor::<TestBackend, 2>::from_data(Data::from([
|
||||
let targets_logits = Tensor::<TestBackend, 2>::from_data_devauto(Data::from([
|
||||
[0.0, 0.0, 0.0, 0.0, 0.0],
|
||||
[1.0, 0.0, 0.0, 0.0, 0.0],
|
||||
[0.0, 0.0, 0.0, 0.0, 1.0],
|
||||
|
@ -273,7 +273,7 @@ mod tests {
|
|||
let tensor = log_softmax(logits, 1);
|
||||
let loss_2 = tensor
|
||||
* targets_logits
|
||||
* Tensor::<TestBackend, 1>::from_floats(weights.as_slice())
|
||||
* Tensor::<TestBackend, 1>::from_floats_devauto(weights.as_slice())
|
||||
.unsqueeze()
|
||||
.repeat(0, 4);
|
||||
let loss_2 = loss_2.sum().neg() / (1. + 2. + 3. + 5.);
|
||||
|
@ -358,7 +358,7 @@ mod tests {
|
|||
let (logits, targets, _) = setup!();
|
||||
let smoothed_targets =
|
||||
CrossEntropyLoss::compute_smoothed_targets(logits.dims(), targets, 0.05);
|
||||
let targets_logits = Tensor::<TestBackend, 2>::from_data(Data::from([
|
||||
let targets_logits = Tensor::<TestBackend, 2>::from_data_devauto(Data::from([
|
||||
[0.01, 0.01, 0.96, 0.01, 0.01],
|
||||
[0.96, 0.01, 0.01, 0.01, 0.01],
|
||||
[0.01, 0.01, 0.01, 0.01, 0.96],
|
||||
|
@ -376,7 +376,7 @@ mod tests {
|
|||
.with_smoothing(Some(0.05))
|
||||
.init()
|
||||
.forward(logits.clone(), targets);
|
||||
let targets_logits = Tensor::<TestBackend, 2>::from_data(Data::from([
|
||||
let targets_logits = Tensor::<TestBackend, 2>::from_data_devauto(Data::from([
|
||||
[0.01, 0.01, 0.96, 0.01, 0.01],
|
||||
[0.96, 0.01, 0.01, 0.01, 0.01],
|
||||
[0.01, 0.01, 0.01, 0.01, 0.96],
|
||||
|
|
|
@ -60,9 +60,11 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn test_mse_loss() {
|
||||
let logits = Tensor::<TestBackend, 2>::from_data(Data::from([[1.0, 2.0], [3.0, 4.0]]));
|
||||
let logits =
|
||||
Tensor::<TestBackend, 2>::from_data_devauto(Data::from([[1.0, 2.0], [3.0, 4.0]]));
|
||||
|
||||
let targets = Tensor::<TestBackend, 2>::from_data(Data::from([[2.0, 1.0], [3.0, 2.0]]));
|
||||
let targets =
|
||||
Tensor::<TestBackend, 2>::from_data_devauto(Data::from([[2.0, 1.0], [3.0, 2.0]]));
|
||||
|
||||
let mse = MSELoss::new();
|
||||
let loss_no_reduction = mse.forward_no_reduction(logits.clone(), targets.clone());
|
||||
|
|
|
@ -33,13 +33,19 @@ pub struct BatchNorm<B: Backend, const D: usize> {
|
|||
}
|
||||
|
||||
impl BatchNormConfig {
|
||||
/// Initialize a new [batch norm](BatchNorm) module.
|
||||
pub fn init<B: Backend, const D: usize>(&self) -> BatchNorm<B, D> {
|
||||
let gamma = Tensor::ones([self.num_features]);
|
||||
let beta = Tensor::zeros([self.num_features]);
|
||||
/// Initialize a new [batch norm](BatchNorm) module on an automatically selected device.
|
||||
pub fn init_devauto<B: Backend, const D: usize>(&self) -> BatchNorm<B, D> {
|
||||
let device = B::Device::default();
|
||||
self.init(&device)
|
||||
}
|
||||
|
||||
let running_mean = Tensor::zeros([self.num_features]);
|
||||
let running_var = Tensor::ones([self.num_features]);
|
||||
/// Initialize a new [batch norm](BatchNorm) module.
|
||||
pub fn init<B: Backend, const D: usize>(&self, device: &B::Device) -> BatchNorm<B, D> {
|
||||
let gamma = Tensor::ones([self.num_features], device);
|
||||
let beta = Tensor::zeros([self.num_features], device);
|
||||
|
||||
let running_mean = Tensor::zeros([self.num_features], device);
|
||||
let running_var = Tensor::ones([self.num_features], device);
|
||||
|
||||
BatchNorm {
|
||||
gamma: Param::from(gamma),
|
||||
|
@ -184,10 +190,16 @@ mod tests_1d {
|
|||
use burn_tensor::Data;
|
||||
|
||||
#[test]
|
||||
fn batch_norm_forward_train() {
|
||||
let module = BatchNormConfig::new(3).init::<TestAutodiffBackend, 1>();
|
||||
fn default_device_initialization() {
|
||||
let _module = BatchNormConfig::new(3).init_devauto::<TestAutodiffBackend, 1>();
|
||||
}
|
||||
|
||||
let output = module.forward(input_tensor());
|
||||
#[test]
|
||||
fn batch_norm_forward_train() {
|
||||
let device = Default::default();
|
||||
let module = BatchNormConfig::new(3).init::<TestAutodiffBackend, 1>(&device);
|
||||
|
||||
let output = module.forward(input_tensor(&device));
|
||||
|
||||
output.to_data().assert_approx_eq(
|
||||
&Data::from([
|
||||
|
@ -208,11 +220,12 @@ mod tests_1d {
|
|||
|
||||
#[test]
|
||||
fn batch_norm_forward_inference() {
|
||||
let module = BatchNormConfig::new(3).init::<TestAutodiffBackend, 1>();
|
||||
let device = Default::default();
|
||||
let module = BatchNormConfig::new(3).init::<TestAutodiffBackend, 1>(&device);
|
||||
|
||||
module.forward(input_tensor());
|
||||
module.forward(input_tensor(&device));
|
||||
let module = module.valid();
|
||||
let output = module.forward(input_tensor());
|
||||
let output = module.forward(input_tensor(&device));
|
||||
|
||||
output.to_data().assert_approx_eq(
|
||||
&Data::from([
|
||||
|
@ -223,11 +236,14 @@ mod tests_1d {
|
|||
);
|
||||
}
|
||||
|
||||
fn input_tensor<B: Backend>() -> Tensor<B, 3> {
|
||||
Tensor::<B, 3>::from_floats([
|
||||
[[0.9601, 0.7277], [0.6272, 0.9034], [0.9378, 0.7230]],
|
||||
[[0.6356, 0.1362], [0.0249, 0.9509], [0.6600, 0.5945]],
|
||||
])
|
||||
fn input_tensor<B: Backend>(device: &B::Device) -> Tensor<B, 3> {
|
||||
Tensor::<B, 3>::from_floats(
|
||||
[
|
||||
[[0.9601, 0.7277], [0.6272, 0.9034], [0.9378, 0.7230]],
|
||||
[[0.6356, 0.1362], [0.0249, 0.9509], [0.6600, 0.5945]],
|
||||
],
|
||||
device,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -240,9 +256,10 @@ mod tests_2d {
|
|||
|
||||
#[test]
|
||||
fn batch_norm_forward_train() {
|
||||
let module = BatchNormConfig::new(3).init::<TestAutodiffBackend, 2>();
|
||||
let device = Default::default();
|
||||
let module = BatchNormConfig::new(3).init::<TestAutodiffBackend, 2>(&device);
|
||||
|
||||
let output = module.forward(input_tensor());
|
||||
let output = module.forward(input_tensor(&device));
|
||||
|
||||
output.to_data().assert_approx_eq(
|
||||
&Data::from([
|
||||
|
@ -263,11 +280,12 @@ mod tests_2d {
|
|||
|
||||
#[test]
|
||||
fn batch_norm_forward_inference() {
|
||||
let module = BatchNormConfig::new(3).init::<TestAutodiffBackend, 2>();
|
||||
let device = Default::default();
|
||||
let module = BatchNormConfig::new(3).init::<TestAutodiffBackend, 2>(&device);
|
||||
|
||||
module.forward(input_tensor());
|
||||
module.forward(input_tensor(&device));
|
||||
let module = module.valid();
|
||||
let output = module.forward(input_tensor());
|
||||
let output = module.forward(input_tensor(&device));
|
||||
|
||||
output.to_data().assert_approx_eq(
|
||||
&Data::from([
|
||||
|
@ -288,9 +306,10 @@ mod tests_2d {
|
|||
|
||||
#[test]
|
||||
fn batch_norm_running_mean() {
|
||||
let module = BatchNormConfig::new(3).init::<TestAutodiffBackend, 2>();
|
||||
let device = Default::default();
|
||||
let module = BatchNormConfig::new(3).init::<TestAutodiffBackend, 2>(&device);
|
||||
|
||||
let _output = module.forward(input_tensor());
|
||||
let _output = module.forward(input_tensor(&device));
|
||||
|
||||
let running_mean = module.running_mean.value_sync();
|
||||
|
||||
|
@ -302,9 +321,10 @@ mod tests_2d {
|
|||
|
||||
#[test]
|
||||
fn batch_norm_running_var() {
|
||||
let module = BatchNormConfig::new(3).init::<TestAutodiffBackend, 2>();
|
||||
let device = Default::default();
|
||||
let module = BatchNormConfig::new(3).init::<TestAutodiffBackend, 2>(&device);
|
||||
|
||||
let _output = module.forward(input_tensor());
|
||||
let _output = module.forward(input_tensor(&device));
|
||||
|
||||
let running_var = module.running_var.value_sync();
|
||||
|
||||
|
@ -316,9 +336,10 @@ mod tests_2d {
|
|||
|
||||
#[test]
|
||||
fn batch_norm_running_mean_inner_module() {
|
||||
let module = BatchNormConfig::new(3).init::<TestAutodiffBackend, 2>();
|
||||
let device = Default::default();
|
||||
let module = BatchNormConfig::new(3).init::<TestAutodiffBackend, 2>(&device);
|
||||
|
||||
let _output = module.forward(input_tensor());
|
||||
let _output = module.forward(input_tensor(&device));
|
||||
|
||||
let module_valid = module.valid();
|
||||
let running_mean = module_valid.running_mean.value();
|
||||
|
@ -331,8 +352,9 @@ mod tests_2d {
|
|||
|
||||
#[test]
|
||||
fn batch_norm_grads() {
|
||||
let module = BatchNormConfig::new(3).init::<TestAutodiffBackend, 2>();
|
||||
let input = input_tensor().require_grad();
|
||||
let device = Default::default();
|
||||
let module = BatchNormConfig::new(3).init::<TestAutodiffBackend, 2>(&device);
|
||||
let input = input_tensor(&device).require_grad();
|
||||
|
||||
let output = module.forward(input.clone());
|
||||
|
||||
|
@ -371,18 +393,21 @@ mod tests_2d {
|
|||
);
|
||||
}
|
||||
|
||||
fn input_tensor<B: Backend>() -> Tensor<B, 4> {
|
||||
Tensor::<B, 4>::from_floats([
|
||||
fn input_tensor<B: Backend>(device: &B::Device) -> Tensor<B, 4> {
|
||||
Tensor::<B, 4>::from_floats(
|
||||
[
|
||||
[[0.9601, 0.7277], [0.1270, 0.5441]],
|
||||
[[0.6272, 0.9034], [0.4066, 0.7179]],
|
||||
[[0.9378, 0.7230], [0.3544, 0.9591]],
|
||||
[
|
||||
[[0.9601, 0.7277], [0.1270, 0.5441]],
|
||||
[[0.6272, 0.9034], [0.4066, 0.7179]],
|
||||
[[0.9378, 0.7230], [0.3544, 0.9591]],
|
||||
],
|
||||
[
|
||||
[[0.6356, 0.1362], [0.1333, 0.7287]],
|
||||
[[0.0249, 0.9509], [0.3791, 0.2481]],
|
||||
[[0.6600, 0.5945], [0.5424, 0.4767]],
|
||||
],
|
||||
],
|
||||
[
|
||||
[[0.6356, 0.1362], [0.1333, 0.7287]],
|
||||
[[0.0249, 0.9509], [0.3791, 0.2481]],
|
||||
[[0.6600, 0.5945], [0.5424, 0.4767]],
|
||||
],
|
||||
])
|
||||
device,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -46,8 +46,8 @@ impl GroupNormConfig {
|
|||
);
|
||||
|
||||
let (gamma, beta) = if self.affine {
|
||||
let gamma = Tensor::ones([self.num_channels]).into();
|
||||
let beta = Tensor::zeros([self.num_channels]).into();
|
||||
let gamma = Tensor::ones_devauto([self.num_channels]).into();
|
||||
let beta = Tensor::zeros_devauto([self.num_channels]).into();
|
||||
|
||||
(Some(gamma), Some(beta))
|
||||
} else {
|
||||
|
@ -142,7 +142,7 @@ mod tests {
|
|||
assert!(module.gamma.is_none());
|
||||
assert!(module.beta.is_none());
|
||||
|
||||
let input = Tensor::from_data(Data::from([
|
||||
let input = Tensor::from_data_devauto(Data::from([
|
||||
[
|
||||
[-0.3034, 0.2726, -0.9659],
|
||||
[-1.1845, -1.3236, 0.0172],
|
||||
|
@ -208,7 +208,7 @@ mod tests {
|
|||
.to_data()
|
||||
.assert_approx_eq(&Data::zeros([6]), 3);
|
||||
|
||||
let input = Tensor::from_data(Data::from([
|
||||
let input = Tensor::from_data_devauto(Data::from([
|
||||
[
|
||||
[0.3345, 0.4429, 0.6639],
|
||||
[0.5041, 0.4175, 0.8437],
|
||||
|
|
|
@ -28,9 +28,15 @@ pub struct LayerNorm<B: Backend> {
|
|||
|
||||
impl LayerNormConfig {
|
||||
/// Initialize a new [layer norm](LayerNorm) module.
|
||||
pub fn init<B: Backend>(&self) -> LayerNorm<B> {
|
||||
let gamma = Tensor::ones([self.d_model]);
|
||||
let beta = Tensor::zeros([self.d_model]);
|
||||
pub fn init_devauto<B: Backend>(&self) -> LayerNorm<B> {
|
||||
let device = B::Device::default();
|
||||
self.init(&device)
|
||||
}
|
||||
|
||||
/// Initialize a new [layer norm](LayerNorm) module.
|
||||
pub fn init<B: Backend>(&self, device: &B::Device) -> LayerNorm<B> {
|
||||
let gamma = Tensor::ones([self.d_model], device);
|
||||
let beta = Tensor::zeros([self.d_model], device);
|
||||
|
||||
LayerNorm {
|
||||
gamma: Param::from(gamma),
|
||||
|
@ -78,12 +84,21 @@ mod tests {
|
|||
#[cfg(not(feature = "std"))]
|
||||
use crate::TestBackend;
|
||||
|
||||
#[test]
|
||||
fn layer_default_initialization() {
|
||||
let _module = LayerNormConfig::new(10).init_devauto::<TestBackend>();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn layer_norm_forward() {
|
||||
let module = LayerNormConfig::new(10).init::<TestBackend>();
|
||||
let input = Tensor::from_data(Data::from([[
|
||||
-0.6897, -2.7106, 2.2222, -1.0330, -0.8933, 1.1765, 0.0601, 1.5252, -0.3630, 0.6728,
|
||||
]]));
|
||||
let device = Default::default();
|
||||
let module = LayerNormConfig::new(10).init::<TestBackend>(&device);
|
||||
let input = Tensor::from_data(
|
||||
Data::from([[
|
||||
-0.6897, -2.7106, 2.2222, -1.0330, -0.8933, 1.1765, 0.0601, 1.5252, -0.3630, 0.6728,
|
||||
]]),
|
||||
&device,
|
||||
);
|
||||
|
||||
let output = module.forward(input);
|
||||
|
||||
|
@ -98,13 +113,18 @@ mod tests {
|
|||
#[cfg(feature = "std")]
|
||||
#[test]
|
||||
fn layer_norm_backward() {
|
||||
let module = LayerNormConfig::new(2).init::<TestAutodiffBackend>();
|
||||
let tensor_1 =
|
||||
Tensor::<TestAutodiffBackend, 2>::from_data(Data::from([[0.0, 1.0], [3.0, 4.0]]))
|
||||
.require_grad();
|
||||
let tensor_2 =
|
||||
Tensor::<TestAutodiffBackend, 2>::from_data(Data::from([[6.0, 7.0], [9.0, 10.0]]))
|
||||
.require_grad();
|
||||
let device = Default::default();
|
||||
let module = LayerNormConfig::new(2).init::<TestAutodiffBackend>(&device);
|
||||
let tensor_1 = Tensor::<TestAutodiffBackend, 2>::from_data(
|
||||
Data::from([[0.0, 1.0], [3.0, 4.0]]),
|
||||
&device,
|
||||
)
|
||||
.require_grad();
|
||||
let tensor_2 = Tensor::<TestAutodiffBackend, 2>::from_data(
|
||||
Data::from([[6.0, 7.0], [9.0, 10.0]]),
|
||||
&device,
|
||||
)
|
||||
.require_grad();
|
||||
|
||||
let x = tensor_1.clone().matmul(tensor_2.clone());
|
||||
|
||||
|
|
|
@ -145,7 +145,7 @@ pub fn generate_sinusoids<B: Backend>(
|
|||
[length, d_model].into(),
|
||||
);
|
||||
|
||||
Tensor::<B, 2>::from_data(data.convert())
|
||||
Tensor::<B, 2>::from_data_devauto(data.convert())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
@ -165,13 +165,13 @@ mod tests {
|
|||
|
||||
// Use a tensor of zeros as input for easy verification of the output
|
||||
// The output should be the sinusoids broadcasted to the input shape
|
||||
let tensor = Tensor::zeros([batch_size, length, d_model]);
|
||||
let tensor = Tensor::zeros_devauto([batch_size, length, d_model]);
|
||||
|
||||
let output = pe.forward(tensor);
|
||||
|
||||
assert_eq!(output.shape().dims, [batch_size, length, d_model]);
|
||||
|
||||
let expected = Tensor::<TestBackend, 3>::from_floats([
|
||||
let expected = Tensor::<TestBackend, 3>::from_floats_devauto([
|
||||
[
|
||||
[0.00000, 1.00000, 0.00000, 1.00000, 0.00000, 1.00000],
|
||||
[0.84147, 0.54030, 0.04640, 0.99892, 0.00215, 1.00000],
|
||||
|
@ -192,7 +192,7 @@ mod tests {
|
|||
let sinusoids = generate_sinusoids::<TestBackend>(12, 6, 10_000);
|
||||
|
||||
// The values are taken from the pytorch reference implementation
|
||||
let expected = Tensor::<TestBackend, 2>::from_floats([
|
||||
let expected = Tensor::<TestBackend, 2>::from_floats_devauto([
|
||||
[0.00000, 1.00000, 0.00000, 1.00000, 0.00000, 1.00000],
|
||||
[0.84147, 0.54030, 0.04640, 0.99892, 0.00215, 1.00000],
|
||||
[0.90930, -0.41615, 0.09270, 0.99569, 0.00431, 0.99999],
|
||||
|
@ -214,7 +214,7 @@ mod tests {
|
|||
fn d_model_input_should_match() {
|
||||
let d_model = 8;
|
||||
let pe = PositionalEncodingConfig::new(d_model).init::<TestBackend>();
|
||||
let input = Tensor::zeros([1, 5, 10]);
|
||||
let input = Tensor::zeros_devauto([1, 5, 10]);
|
||||
let _output = pe.forward(input);
|
||||
}
|
||||
|
||||
|
@ -223,7 +223,7 @@ mod tests {
|
|||
fn input_length_should_be_less_than_max_len() {
|
||||
let d_model = 8;
|
||||
let pe = PositionalEncodingConfig::new(d_model).init::<TestBackend>();
|
||||
let input = Tensor::zeros([1, 6_000, d_model]);
|
||||
let input = Tensor::zeros_devauto([1, 6_000, d_model]);
|
||||
let _output = pe.forward(input);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -23,7 +23,13 @@ pub struct GateController<B: Backend> {
|
|||
|
||||
impl<B: Backend> GateController<B> {
|
||||
/// Initialize a new [gate_controller](GateController) module.
|
||||
pub fn new(d_input: usize, d_output: usize, bias: bool, initializer: Initializer) -> Self {
|
||||
pub fn new(
|
||||
d_input: usize,
|
||||
d_output: usize,
|
||||
bias: bool,
|
||||
initializer: Initializer,
|
||||
device: &B::Device,
|
||||
) -> Self {
|
||||
Self {
|
||||
input_transform: LinearConfig {
|
||||
d_input,
|
||||
|
@ -31,14 +37,14 @@ impl<B: Backend> GateController<B> {
|
|||
bias,
|
||||
initializer: initializer.clone(),
|
||||
}
|
||||
.init(),
|
||||
.init(device),
|
||||
hidden_transform: LinearConfig {
|
||||
d_input: d_output,
|
||||
d_output,
|
||||
bias,
|
||||
initializer,
|
||||
}
|
||||
.init(),
|
||||
.init(device),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -35,8 +35,14 @@ pub struct Gru<B: Backend> {
|
|||
}
|
||||
|
||||
impl GruConfig {
|
||||
/// Initialize a new [gru](Gru) module on an automatically selected device.
|
||||
pub fn init_devauto<B: Backend>(&self) -> Gru<B> {
|
||||
let device = B::Device::default();
|
||||
self.init(&device)
|
||||
}
|
||||
|
||||
/// Initialize a new [gru](Gru) module.
|
||||
pub fn init<B: Backend>(&self) -> Gru<B> {
|
||||
pub fn init<B: Backend>(&self, device: &B::Device) -> Gru<B> {
|
||||
let d_output = self.d_hidden;
|
||||
|
||||
let update_gate = gate_controller::GateController::new(
|
||||
|
@ -44,18 +50,21 @@ impl GruConfig {
|
|||
d_output,
|
||||
self.bias,
|
||||
self.initializer.clone(),
|
||||
device,
|
||||
);
|
||||
let reset_gate = gate_controller::GateController::new(
|
||||
self.d_input,
|
||||
d_output,
|
||||
self.bias,
|
||||
self.initializer.clone(),
|
||||
device,
|
||||
);
|
||||
let new_gate = gate_controller::GateController::new(
|
||||
self.d_input,
|
||||
d_output,
|
||||
self.bias,
|
||||
self.initializer.clone(),
|
||||
device,
|
||||
);
|
||||
|
||||
Gru {
|
||||
|
@ -110,7 +119,10 @@ impl<B: Backend> Gru<B> {
|
|||
|
||||
let mut hidden_state = match state {
|
||||
Some(state) => state,
|
||||
None => Tensor::zeros([batch_size, seq_length, self.d_hidden]),
|
||||
None => Tensor::zeros(
|
||||
[batch_size, seq_length, self.d_hidden],
|
||||
&batched_input.device(),
|
||||
),
|
||||
};
|
||||
|
||||
for (t, (input_t, hidden_t)) in batched_input
|
||||
|
@ -209,7 +221,8 @@ mod tests {
|
|||
fn tests_forward_single_input_single_feature() {
|
||||
TestBackend::seed(0);
|
||||
let config = GruConfig::new(1, 1, false);
|
||||
let mut gru = config.init::<TestBackend>();
|
||||
let device = Default::default();
|
||||
let mut gru = config.init::<TestBackend>(&device);
|
||||
|
||||
fn create_gate_controller(
|
||||
weights: f32,
|
||||
|
@ -218,10 +231,11 @@ mod tests {
|
|||
d_output: usize,
|
||||
bias: bool,
|
||||
initializer: Initializer,
|
||||
device: &<TestBackend as Backend>::Device,
|
||||
) -> GateController<TestBackend> {
|
||||
let record = LinearRecord {
|
||||
weight: Param::from(Tensor::from_data(Data::from([[weights]]))),
|
||||
bias: Some(Param::from(Tensor::from_data(Data::from([biases])))),
|
||||
weight: Param::from(Tensor::from_data(Data::from([[weights]]), device)),
|
||||
bias: Some(Param::from(Tensor::from_data(Data::from([biases]), device))),
|
||||
};
|
||||
gate_controller::GateController::create_with_weights(
|
||||
d_input,
|
||||
|
@ -240,6 +254,7 @@ mod tests {
|
|||
1,
|
||||
false,
|
||||
Initializer::XavierNormal { gain: 1.0 },
|
||||
&device,
|
||||
);
|
||||
gru.reset_gate = create_gate_controller(
|
||||
0.6,
|
||||
|
@ -248,6 +263,7 @@ mod tests {
|
|||
1,
|
||||
false,
|
||||
Initializer::XavierNormal { gain: 1.0 },
|
||||
&device,
|
||||
);
|
||||
gru.new_gate = create_gate_controller(
|
||||
0.7,
|
||||
|
@ -256,24 +272,32 @@ mod tests {
|
|||
1,
|
||||
false,
|
||||
Initializer::XavierNormal { gain: 1.0 },
|
||||
&device,
|
||||
);
|
||||
|
||||
let input = Tensor::<TestBackend, 3>::from_data(Data::from([[[0.1]]]));
|
||||
let input = Tensor::<TestBackend, 3>::from_data(Data::from([[[0.1]]]), &device);
|
||||
|
||||
let state = gru.forward(input, None);
|
||||
|
||||
let output = state.select(0, Tensor::arange(0..1)).squeeze(0);
|
||||
let output = state.select(0, Tensor::arange(0..1, &device)).squeeze(0);
|
||||
|
||||
output.to_data().assert_approx_eq(&Data::from([[0.034]]), 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_batched_forward_pass() {
|
||||
let gru = GruConfig::new(64, 1024, true).init::<TestBackend>();
|
||||
let batched_input = Tensor::<TestBackend, 3>::random([8, 10, 64], Distribution::Default);
|
||||
let device = Default::default();
|
||||
let gru = GruConfig::new(64, 1024, true).init::<TestBackend>(&device);
|
||||
let batched_input =
|
||||
Tensor::<TestBackend, 3>::random([8, 10, 64], Distribution::Default, &device);
|
||||
|
||||
let hidden_state = gru.forward(batched_input, None);
|
||||
|
||||
assert_eq!(hidden_state.shape().dims, [8, 10, 1024]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_initialization_on_default_device() {
|
||||
let _module = GruConfig::new(64, 1024, true).init_devauto::<TestBackend>();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -36,8 +36,14 @@ pub struct Lstm<B: Backend> {
|
|||
}
|
||||
|
||||
impl LstmConfig {
|
||||
/// Initialize a new [lstm](Lstm) module on an automatically selected device.
|
||||
pub fn init_devauto<B: Backend>(&self) -> Lstm<B> {
|
||||
let device = B::Device::default();
|
||||
self.init(&device)
|
||||
}
|
||||
|
||||
/// Initialize a new [lstm](Lstm) module.
|
||||
pub fn init<B: Backend>(&self) -> Lstm<B> {
|
||||
pub fn init<B: Backend>(&self, device: &B::Device) -> Lstm<B> {
|
||||
let d_output = self.d_hidden;
|
||||
|
||||
let input_gate = gate_controller::GateController::new(
|
||||
|
@ -45,24 +51,28 @@ impl LstmConfig {
|
|||
d_output,
|
||||
self.bias,
|
||||
self.initializer.clone(),
|
||||
device,
|
||||
);
|
||||
let forget_gate = gate_controller::GateController::new(
|
||||
self.d_input,
|
||||
d_output,
|
||||
self.bias,
|
||||
self.initializer.clone(),
|
||||
device,
|
||||
);
|
||||
let output_gate = gate_controller::GateController::new(
|
||||
self.d_input,
|
||||
d_output,
|
||||
self.bias,
|
||||
self.initializer.clone(),
|
||||
device,
|
||||
);
|
||||
let cell_gate = gate_controller::GateController::new(
|
||||
self.d_input,
|
||||
d_output,
|
||||
self.bias,
|
||||
self.initializer.clone(),
|
||||
device,
|
||||
);
|
||||
|
||||
Lstm {
|
||||
|
@ -123,14 +133,16 @@ impl<B: Backend> Lstm<B> {
|
|||
state: Option<(Tensor<B, 2>, Tensor<B, 2>)>,
|
||||
) -> (Tensor<B, 3>, Tensor<B, 3>) {
|
||||
let [batch_size, seq_length, _] = batched_input.shape().dims;
|
||||
let mut batched_cell_state = Tensor::zeros([batch_size, seq_length, self.d_hidden]);
|
||||
let mut batched_hidden_state = Tensor::zeros([batch_size, seq_length, self.d_hidden]);
|
||||
let device = &batched_input.device();
|
||||
let mut batched_cell_state = Tensor::zeros([batch_size, seq_length, self.d_hidden], device);
|
||||
let mut batched_hidden_state =
|
||||
Tensor::zeros([batch_size, seq_length, self.d_hidden], device);
|
||||
|
||||
let (mut cell_state, mut hidden_state) = match state {
|
||||
Some((cell_state, hidden_state)) => (cell_state, hidden_state),
|
||||
None => (
|
||||
Tensor::zeros([batch_size, self.d_hidden]),
|
||||
Tensor::zeros([batch_size, self.d_hidden]),
|
||||
Tensor::zeros([batch_size, self.d_hidden], device),
|
||||
Tensor::zeros([batch_size, self.d_hidden], device),
|
||||
),
|
||||
};
|
||||
|
||||
|
@ -226,7 +238,7 @@ mod tests {
|
|||
|
||||
let config = LstmConfig::new(5, 5, false)
|
||||
.with_initializer(Initializer::Uniform { min: 0.0, max: 1.0 });
|
||||
let lstm = config.init::<TestBackend>();
|
||||
let lstm = config.init_devauto::<TestBackend>();
|
||||
|
||||
let gate_to_data =
|
||||
|gate: GateController<TestBackend>| gate.input_transform.weight.val().to_data();
|
||||
|
@ -250,7 +262,8 @@ mod tests {
|
|||
fn test_forward_single_input_single_feature() {
|
||||
TestBackend::seed(0);
|
||||
let config = LstmConfig::new(1, 1, false);
|
||||
let mut lstm = config.init::<TestBackend>();
|
||||
let device = Default::default();
|
||||
let mut lstm = config.init::<TestBackend>(&device);
|
||||
|
||||
fn create_gate_controller(
|
||||
weights: f32,
|
||||
|
@ -259,10 +272,11 @@ mod tests {
|
|||
d_output: usize,
|
||||
bias: bool,
|
||||
initializer: Initializer,
|
||||
device: &<TestBackend as Backend>::Device,
|
||||
) -> GateController<TestBackend> {
|
||||
let record = LinearRecord {
|
||||
weight: Param::from(Tensor::from_data(Data::from([[weights]]))),
|
||||
bias: Some(Param::from(Tensor::from_data(Data::from([biases])))),
|
||||
weight: Param::from(Tensor::from_data(Data::from([[weights]]), device)),
|
||||
bias: Some(Param::from(Tensor::from_data(Data::from([biases]), device))),
|
||||
};
|
||||
gate_controller::GateController::create_with_weights(
|
||||
d_input,
|
||||
|
@ -281,6 +295,7 @@ mod tests {
|
|||
1,
|
||||
false,
|
||||
Initializer::XavierUniform { gain: 1.0 },
|
||||
&device,
|
||||
);
|
||||
lstm.forget_gate = create_gate_controller(
|
||||
0.7,
|
||||
|
@ -289,6 +304,7 @@ mod tests {
|
|||
1,
|
||||
false,
|
||||
Initializer::XavierUniform { gain: 1.0 },
|
||||
&device,
|
||||
);
|
||||
lstm.cell_gate = create_gate_controller(
|
||||
0.9,
|
||||
|
@ -297,6 +313,7 @@ mod tests {
|
|||
1,
|
||||
false,
|
||||
Initializer::XavierUniform { gain: 1.0 },
|
||||
&device,
|
||||
);
|
||||
lstm.output_gate = create_gate_controller(
|
||||
1.1,
|
||||
|
@ -305,15 +322,18 @@ mod tests {
|
|||
1,
|
||||
false,
|
||||
Initializer::XavierUniform { gain: 1.0 },
|
||||
&device,
|
||||
);
|
||||
|
||||
// single timestep with single feature
|
||||
let input = Tensor::<TestBackend, 3>::from_data(Data::from([[[0.1]]]));
|
||||
let input = Tensor::<TestBackend, 3>::from_data(Data::from([[[0.1]]]), &device);
|
||||
|
||||
let (cell_state_batch, hidden_state_batch) = lstm.forward(input, None);
|
||||
let cell_state = cell_state_batch.select(0, Tensor::arange(0..1)).squeeze(0);
|
||||
let cell_state = cell_state_batch
|
||||
.select(0, Tensor::arange(0..1, &device))
|
||||
.squeeze(0);
|
||||
let hidden_state = hidden_state_batch
|
||||
.select(0, Tensor::arange(0..1))
|
||||
.select(0, Tensor::arange(0..1, &device))
|
||||
.squeeze(0);
|
||||
cell_state
|
||||
.to_data()
|
||||
|
@ -325,8 +345,10 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn test_batched_forward_pass() {
|
||||
let lstm = LstmConfig::new(64, 1024, true).init::<TestBackend>();
|
||||
let batched_input = Tensor::<TestBackend, 3>::random([8, 10, 64], Distribution::Default);
|
||||
let device = Default::default();
|
||||
let lstm = LstmConfig::new(64, 1024, true).init(&device);
|
||||
let batched_input =
|
||||
Tensor::<TestBackend, 3>::random([8, 10, 64], Distribution::Default, &device);
|
||||
|
||||
let (cell_state, hidden_state) = lstm.forward(batched_input, None);
|
||||
|
||||
|
|
|
@ -61,9 +61,15 @@ pub struct TransformerDecoder<B: Backend> {
|
|||
|
||||
impl TransformerDecoderConfig {
|
||||
/// Initialize a new [Transformer Decoder](TransformerDecoder) module.
|
||||
pub fn init<B: Backend>(&self) -> TransformerDecoder<B> {
|
||||
pub fn init_devauto<B: Backend>(&self) -> TransformerDecoder<B> {
|
||||
let device = B::Device::default();
|
||||
self.init(&device)
|
||||
}
|
||||
|
||||
/// Initialize a new [Transformer Decoder](TransformerDecoder) module.
|
||||
pub fn init<B: Backend>(&self, device: &B::Device) -> TransformerDecoder<B> {
|
||||
let layers = (0..self.n_layers)
|
||||
.map(|_| TransformerDecoderLayer::new(self))
|
||||
.map(|_| TransformerDecoderLayer::new(self, device))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
TransformerDecoder { layers }
|
||||
|
@ -190,25 +196,25 @@ impl<B: Backend> TransformerDecoderAutoregressiveCache<B> {
|
|||
}
|
||||
|
||||
impl<B: Backend> TransformerDecoderLayer<B> {
|
||||
fn new(config: &TransformerDecoderConfig) -> Self {
|
||||
fn new(config: &TransformerDecoderConfig, device: &B::Device) -> Self {
|
||||
let self_attn = MultiHeadAttentionConfig::new(config.d_model, config.n_heads)
|
||||
.with_initializer(config.initializer.clone())
|
||||
.with_dropout(config.dropout)
|
||||
.with_quiet_softmax(config.quiet_softmax)
|
||||
.init();
|
||||
.init(device);
|
||||
|
||||
let cross_attn = MultiHeadAttentionConfig::new(config.d_model, config.n_heads)
|
||||
.with_initializer(config.initializer.clone())
|
||||
.with_dropout(config.dropout)
|
||||
.with_quiet_softmax(config.quiet_softmax)
|
||||
.init();
|
||||
let norm_1 = LayerNormConfig::new(config.d_model).init();
|
||||
let norm_2 = LayerNormConfig::new(config.d_model).init();
|
||||
let norm_3 = LayerNormConfig::new(config.d_model).init();
|
||||
.init(device);
|
||||
let norm_1 = LayerNormConfig::new(config.d_model).init(device);
|
||||
let norm_2 = LayerNormConfig::new(config.d_model).init(device);
|
||||
let norm_3 = LayerNormConfig::new(config.d_model).init(device);
|
||||
let dropout = DropoutConfig::new(config.dropout).init();
|
||||
let pwff = PositionWiseFeedForwardConfig::new(config.d_model, config.d_ff)
|
||||
.with_dropout(config.dropout)
|
||||
.init();
|
||||
.init(device);
|
||||
|
||||
Self {
|
||||
cross_attn,
|
||||
|
@ -397,6 +403,15 @@ mod tests {
|
|||
use crate::{nn::attention::generate_autoregressive_mask, TestBackend};
|
||||
use burn_tensor::Distribution;
|
||||
|
||||
#[test]
|
||||
fn test_initialization_on_default_device() {
|
||||
let [d_model, d_ff, n_heads, num_layers] = [12, 24, 2, 3];
|
||||
TestBackend::seed(0);
|
||||
let _module = TransformerDecoderConfig::new(d_model, d_ff, n_heads, num_layers)
|
||||
.with_norm_first(false)
|
||||
.init_devauto::<TestBackend>();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_autoregressive_norm_last() {
|
||||
let [d_model, d_ff, n_heads, num_layers] = [12, 24, 2, 3];
|
||||
|
@ -419,16 +434,19 @@ mod tests {
|
|||
}
|
||||
|
||||
fn test_autoregressive(config: TransformerDecoderConfig) {
|
||||
let device = Default::default();
|
||||
let [batch_size, seq_length, d_model] = [3, 4, config.d_model];
|
||||
let transformer = config.init();
|
||||
let transformer = config.init(&device);
|
||||
|
||||
let memory = Tensor::<TestBackend, 3>::random(
|
||||
[batch_size, seq_length, d_model],
|
||||
Distribution::Default,
|
||||
&device,
|
||||
);
|
||||
let target = Tensor::<TestBackend, 3>::random(
|
||||
[batch_size, seq_length, d_model],
|
||||
Distribution::Default,
|
||||
&device,
|
||||
);
|
||||
let mask_attn = generate_autoregressive_mask(batch_size, seq_length, &target.device());
|
||||
let input = TransformerDecoderInput::new(target.clone(), memory.clone())
|
||||
|
|
|
@ -91,9 +91,15 @@ impl<B: Backend> TransformerEncoderInput<B> {
|
|||
}
|
||||
impl TransformerEncoderConfig {
|
||||
/// Initialize a new [transformer encoder](TransformerEncoder) module.
|
||||
pub fn init<B: Backend>(&self) -> TransformerEncoder<B> {
|
||||
pub fn init_devauto<B: Backend>(&self) -> TransformerEncoder<B> {
|
||||
let device = B::Device::default();
|
||||
self.init(&device)
|
||||
}
|
||||
|
||||
/// Initialize a new [transformer encoder](TransformerEncoder) module.
|
||||
pub fn init<B: Backend>(&self, device: &B::Device) -> TransformerEncoder<B> {
|
||||
let layers = (0..self.n_layers)
|
||||
.map(|_| TransformerEncoderLayer::new(self))
|
||||
.map(|_| TransformerEncoderLayer::new(self, device))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
TransformerEncoder { layers }
|
||||
|
@ -202,19 +208,19 @@ impl<B: Backend> TransformerEncoderLayer<B> {
|
|||
norm_first: config.norm_first,
|
||||
}
|
||||
}
|
||||
fn new(config: &TransformerEncoderConfig) -> Self {
|
||||
fn new(config: &TransformerEncoderConfig, device: &B::Device) -> Self {
|
||||
let mha = MultiHeadAttentionConfig::new(config.d_model, config.n_heads)
|
||||
.with_initializer(config.initializer.clone())
|
||||
.with_dropout(config.dropout)
|
||||
.with_quiet_softmax(config.quiet_softmax)
|
||||
.init();
|
||||
let norm_1 = LayerNormConfig::new(config.d_model).init();
|
||||
let norm_2 = LayerNormConfig::new(config.d_model).init();
|
||||
.init(device);
|
||||
let norm_1 = LayerNormConfig::new(config.d_model).init(device);
|
||||
let norm_2 = LayerNormConfig::new(config.d_model).init(device);
|
||||
let dropout = DropoutConfig::new(config.dropout).init();
|
||||
let pwff = PositionWiseFeedForwardConfig::new(config.d_model, config.d_ff)
|
||||
.with_initializer(config.initializer.clone())
|
||||
.with_dropout(config.dropout)
|
||||
.init();
|
||||
.init(device);
|
||||
|
||||
Self {
|
||||
mha,
|
||||
|
@ -345,6 +351,13 @@ mod tests {
|
|||
use crate::{nn::attention::generate_autoregressive_mask, TestBackend};
|
||||
use burn_tensor::Distribution;
|
||||
|
||||
#[test]
|
||||
fn test_initialization_on_default_device() {
|
||||
let [d_model, d_ff, n_heads, num_layers] = [12, 24, 2, 3];
|
||||
let _module = TransformerEncoderConfig::new(d_model, d_ff, n_heads, num_layers)
|
||||
.init_devauto::<TestBackend>();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_autoregressive_norm_last() {
|
||||
let [d_model, d_ff, n_heads, num_layers] = [12, 24, 2, 3];
|
||||
|
@ -364,11 +377,13 @@ mod tests {
|
|||
|
||||
fn test_autoregressive(config: TransformerEncoderConfig) {
|
||||
let [batch_size, seq_length, d_model] = [3, 4, config.d_model];
|
||||
let transformer = config.init();
|
||||
let device = Default::default();
|
||||
let transformer = config.init(&device);
|
||||
|
||||
let tensor = Tensor::<TestBackend, 3>::random(
|
||||
[batch_size, seq_length, d_model],
|
||||
Distribution::Default,
|
||||
&device,
|
||||
);
|
||||
let mask_attn = generate_autoregressive_mask(batch_size, seq_length, &tensor.device());
|
||||
let input = TransformerEncoderInput::new(tensor.clone()).mask_attn(mask_attn);
|
||||
|
|
|
@ -40,15 +40,21 @@ pub struct PositionWiseFeedForward<B: Backend> {
|
|||
}
|
||||
|
||||
impl PositionWiseFeedForwardConfig {
|
||||
/// Initialize a new [position-wise feed-forward](PositionWiseFeedForward) module
|
||||
/// on an automatically selected device.
|
||||
pub fn init_devauto<B: Backend>(&self) -> PositionWiseFeedForward<B> {
|
||||
let device = B::Device::default();
|
||||
self.init(&device)
|
||||
}
|
||||
/// Initialize a new [position-wise feed-forward](PositionWiseFeedForward) module.
|
||||
pub fn init<B: Backend>(&self) -> PositionWiseFeedForward<B> {
|
||||
pub fn init<B: Backend>(&self, device: &B::Device) -> PositionWiseFeedForward<B> {
|
||||
PositionWiseFeedForward {
|
||||
linear_inner: LinearConfig::new(self.d_model, self.d_ff)
|
||||
.with_initializer(self.initializer.clone())
|
||||
.init(),
|
||||
.init(device),
|
||||
linear_outer: LinearConfig::new(self.d_ff, self.d_model)
|
||||
.with_initializer(self.initializer.clone())
|
||||
.init(),
|
||||
.init(device),
|
||||
dropout: DropoutConfig::new(self.dropout).init(),
|
||||
gelu: GELU::new(),
|
||||
}
|
||||
|
|
|
@ -162,8 +162,9 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn test_adagrad_optimizer_save_load_state() {
|
||||
let linear = nn::LinearConfig::new(6, 6).init();
|
||||
let x = Tensor::<TestAutodiffBackend, 2>::random([2, 6], Distribution::Default);
|
||||
let device = Default::default();
|
||||
let linear = nn::LinearConfig::new(6, 6).init(&device);
|
||||
let x = Tensor::<TestAutodiffBackend, 2>::random([2, 6], Distribution::Default, &device);
|
||||
let mut optimizer = create_adagrad();
|
||||
let grads = linear.forward(x).backward();
|
||||
let grads = GradientsParams::from_grads(grads, &linear);
|
||||
|
@ -198,12 +199,12 @@ mod tests {
|
|||
]),
|
||||
Data::from([-0.3905, 0.0884, -0.0970, 0.1176, 0.1366, 0.0130]),
|
||||
);
|
||||
let x_1 = Tensor::from_floats([
|
||||
let x_1 = Tensor::from_floats_devauto([
|
||||
[0.6294, 0.0940, 0.8176, 0.8824, 0.5228, 0.4310],
|
||||
[0.7152, 0.9559, 0.7893, 0.5684, 0.5939, 0.8883],
|
||||
])
|
||||
.require_grad();
|
||||
let x_2 = Tensor::from_floats([
|
||||
let x_2 = Tensor::from_floats_devauto([
|
||||
[0.8491, 0.2108, 0.8939, 0.4433, 0.5527, 0.2528],
|
||||
[0.3270, 0.0412, 0.5538, 0.9605, 0.3195, 0.9085],
|
||||
])
|
||||
|
@ -257,8 +258,8 @@ mod tests {
|
|||
bias: Data<f32, 1>,
|
||||
) -> nn::Linear<TestAutodiffBackend> {
|
||||
let record = nn::LinearRecord {
|
||||
weight: Param::from(Tensor::from_data(weight)),
|
||||
bias: Some(Param::from(Tensor::from_data(bias))),
|
||||
weight: Param::from(Tensor::from_data_devauto(weight)),
|
||||
bias: Some(Param::from(Tensor::from_data_devauto(bias))),
|
||||
};
|
||||
|
||||
nn::LinearConfig::new(6, 6).init_with(record)
|
||||
|
|
|
@ -195,8 +195,9 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn test_adam_optimizer_save_load_state() {
|
||||
let linear = nn::LinearConfig::new(6, 6).init();
|
||||
let x = Tensor::<TestAutodiffBackend, 2>::random([2, 6], Distribution::Default);
|
||||
let device = Default::default();
|
||||
let linear = nn::LinearConfig::new(6, 6).init(&device);
|
||||
let x = Tensor::<TestAutodiffBackend, 2>::random([2, 6], Distribution::Default, &device);
|
||||
let mut optimizer = create_adam();
|
||||
let grads = linear.forward(x).backward();
|
||||
let grads = GradientsParams::from_grads(grads, &linear);
|
||||
|
@ -231,12 +232,12 @@ mod tests {
|
|||
]),
|
||||
Data::from([-0.3905, 0.0884, -0.0970, 0.1176, 0.1366, 0.0130]),
|
||||
);
|
||||
let x_1 = Tensor::from_floats([
|
||||
let x_1 = Tensor::from_floats_devauto([
|
||||
[0.6294, 0.0940, 0.8176, 0.8824, 0.5228, 0.4310],
|
||||
[0.7152, 0.9559, 0.7893, 0.5684, 0.5939, 0.8883],
|
||||
])
|
||||
.require_grad();
|
||||
let x_2 = Tensor::from_floats([
|
||||
let x_2 = Tensor::from_floats_devauto([
|
||||
[0.8491, 0.2108, 0.8939, 0.4433, 0.5527, 0.2528],
|
||||
[0.3270, 0.0412, 0.5538, 0.9605, 0.3195, 0.9085],
|
||||
])
|
||||
|
@ -301,7 +302,7 @@ mod tests {
|
|||
Data::from([-0.3905, 0.0884, -0.0970, 0.1176, 0.1366, 0.0130]),
|
||||
);
|
||||
|
||||
let x = Tensor::from_floats([
|
||||
let x = Tensor::from_floats_devauto([
|
||||
[0.8491, 0.2108, 0.8939, 0.4433, 0.5527, 0.2528],
|
||||
[0.3270, 0.0412, 0.5538, 0.9605, 0.3195, 0.9085],
|
||||
])
|
||||
|
@ -331,8 +332,8 @@ mod tests {
|
|||
bias: Data<f32, 1>,
|
||||
) -> nn::Linear<TestAutodiffBackend> {
|
||||
let record = nn::LinearRecord {
|
||||
weight: Param::from(Tensor::from_data(weight)),
|
||||
bias: Some(Param::from(Tensor::from_data(bias))),
|
||||
weight: Param::from(Tensor::from_data_devauto(weight)),
|
||||
bias: Some(Param::from(Tensor::from_data_devauto(bias))),
|
||||
};
|
||||
|
||||
nn::LinearConfig::new(6, 6).init_with(record)
|
||||
|
|
|
@ -208,8 +208,9 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn test_adamw_optimizer_save_load_state() {
|
||||
let linear = nn::LinearConfig::new(6, 6).init();
|
||||
let x = Tensor::<TestAutodiffBackend, 2>::random([2, 6], Distribution::Default);
|
||||
let device = Default::default();
|
||||
let linear = nn::LinearConfig::new(6, 6).init(&device);
|
||||
let x = Tensor::<TestAutodiffBackend, 2>::random([2, 6], Distribution::Default, &device);
|
||||
let mut optimizer = create_adamw();
|
||||
let grads = linear.forward(x).backward();
|
||||
let grads = GradientsParams::from_grads(grads, &linear);
|
||||
|
@ -243,12 +244,12 @@ mod tests {
|
|||
]),
|
||||
Data::from([-0.3905, 0.0884, -0.0970, 0.1176, 0.1366, 0.0130]),
|
||||
);
|
||||
let x_1 = Tensor::from_floats([
|
||||
let x_1 = Tensor::from_floats_devauto([
|
||||
[0.6294, 0.0940, 0.8176, 0.8824, 0.5228, 0.4310],
|
||||
[0.7152, 0.9559, 0.7893, 0.5684, 0.5939, 0.8883],
|
||||
])
|
||||
.require_grad();
|
||||
let x_2 = Tensor::from_floats([
|
||||
let x_2 = Tensor::from_floats_devauto([
|
||||
[0.8491, 0.2108, 0.8939, 0.4433, 0.5527, 0.2528],
|
||||
[0.3270, 0.0412, 0.5538, 0.9605, 0.3195, 0.9085],
|
||||
])
|
||||
|
@ -313,7 +314,7 @@ mod tests {
|
|||
Data::from([-0.3905, 0.0884, -0.0970, 0.1176, 0.1366, 0.0130]),
|
||||
);
|
||||
|
||||
let x = Tensor::from_floats([
|
||||
let x = Tensor::from_floats_devauto([
|
||||
[0.8491, 0.2108, 0.8939, 0.4433, 0.5527, 0.2528],
|
||||
[0.3270, 0.0412, 0.5538, 0.9605, 0.3195, 0.9085],
|
||||
])
|
||||
|
@ -343,8 +344,8 @@ mod tests {
|
|||
bias: Data<f32, 1>,
|
||||
) -> nn::Linear<TestAutodiffBackend> {
|
||||
let record = nn::LinearRecord {
|
||||
weight: Param::from(Tensor::from_data(weight)),
|
||||
bias: Some(Param::from(Tensor::from_data(bias))),
|
||||
weight: Param::from(Tensor::from_data_devauto(weight)),
|
||||
bias: Some(Param::from(Tensor::from_data_devauto(bias))),
|
||||
};
|
||||
|
||||
nn::LinearConfig::new(6, 6).init_with(record)
|
||||
|
|
|
@ -81,13 +81,14 @@ mod tests {
|
|||
nn::{Linear, LinearConfig},
|
||||
TestAutodiffBackend,
|
||||
};
|
||||
use burn_tensor::Distribution;
|
||||
use burn_tensor::{backend::Backend, Distribution};
|
||||
|
||||
#[test]
|
||||
fn test_accumulate_gradients_one_step() {
|
||||
let device = Default::default();
|
||||
let mut accumulator = GradientsAccumulator::new();
|
||||
let layer = layer();
|
||||
let loss = layer.forward(random_tensor());
|
||||
let layer = layer::<TestAutodiffBackend>(&device);
|
||||
let loss = layer.forward(random_tensor::<TestAutodiffBackend>(&device));
|
||||
let grads = GradientsParams::from_grads(loss.backward(), &layer);
|
||||
|
||||
accumulator.accumulate(&layer, grads);
|
||||
|
@ -98,10 +99,11 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn test_accumulate_gradients_two_steps() {
|
||||
let device = Default::default();
|
||||
let mut accumulator = GradientsAccumulator::new();
|
||||
let layer = layer();
|
||||
let loss_1 = layer.forward(random_tensor());
|
||||
let loss_2 = layer.forward(random_tensor());
|
||||
let layer = layer::<TestAutodiffBackend>(&device);
|
||||
let loss_1 = layer.forward(random_tensor(&device));
|
||||
let loss_2 = layer.forward(random_tensor(&device));
|
||||
let grads_1 = GradientsParams::from_grads(loss_1.backward(), &layer);
|
||||
let grads_2 = GradientsParams::from_grads(loss_2.backward(), &layer);
|
||||
|
||||
|
@ -112,11 +114,11 @@ mod tests {
|
|||
assert_eq!(grads.len(), 2)
|
||||
}
|
||||
|
||||
fn layer() -> Linear<TestAutodiffBackend> {
|
||||
LinearConfig::new(20, 20).with_bias(true).init()
|
||||
fn layer<B: Backend>(device: &B::Device) -> Linear<B> {
|
||||
LinearConfig::new(20, 20).with_bias(true).init(device)
|
||||
}
|
||||
|
||||
fn random_tensor() -> Tensor<TestAutodiffBackend, 2> {
|
||||
Tensor::<TestAutodiffBackend, 2>::random([2, 20], Distribution::Default)
|
||||
fn random_tensor<B: Backend>(device: &B::Device) -> Tensor<B, 2> {
|
||||
Tensor::<B, 2>::random([2, 20], Distribution::Default, device)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -99,11 +99,12 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn test_convert_grads() {
|
||||
let layer_1 = layer();
|
||||
let device = Default::default();
|
||||
let layer_1 = layer::<TestAutodiffBackend>(&device);
|
||||
let mut layer_2 = layer_1.clone();
|
||||
layer_2 = layer_2.fork(&<TestAutodiffBackend as Backend>::Device::default());
|
||||
let loss_1 = layer_1.forward(random_tensor());
|
||||
let loss_2 = layer_2.forward(random_tensor());
|
||||
layer_2 = layer_2.fork(&device);
|
||||
let loss_1 = layer_1.forward(random_tensor(&device));
|
||||
let loss_2 = layer_2.forward(random_tensor(&device));
|
||||
let grads_1 = GradientsParams::from_grads(loss_1.backward(), &layer_1);
|
||||
let grads_2 = GradientsParams::from_grads(loss_2.backward(), &layer_2);
|
||||
|
||||
|
@ -115,11 +116,11 @@ mod tests {
|
|||
assert_eq!(grads_2.len(), param_ids_2.len());
|
||||
}
|
||||
|
||||
fn layer() -> Linear<TestAutodiffBackend> {
|
||||
LinearConfig::new(20, 20).with_bias(true).init()
|
||||
fn layer<B: Backend>(device: &B::Device) -> Linear<B> {
|
||||
LinearConfig::new(20, 20).with_bias(true).init(device)
|
||||
}
|
||||
|
||||
fn random_tensor() -> Tensor<TestAutodiffBackend, 2> {
|
||||
Tensor::<TestAutodiffBackend, 2>::random([2, 20], Distribution::Default)
|
||||
fn random_tensor<B: Backend>(device: &B::Device) -> Tensor<B, 2> {
|
||||
Tensor::<B, 2>::random([2, 20], Distribution::Default, device)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -317,7 +317,7 @@ mod tests {
|
|||
use crate::optim::{GradientsParams, Optimizer};
|
||||
use crate::record::{BinFileRecorder, FullPrecisionSettings, Recorder};
|
||||
use crate::tensor::{Data, Distribution, Tensor};
|
||||
use crate::{nn, nn::Linear, TestAutodiffBackend, TestBackend};
|
||||
use crate::{nn, TestAutodiffBackend, TestBackend};
|
||||
use tempfile::TempDir;
|
||||
|
||||
const LEARNING_RATE: LearningRate = 0.01;
|
||||
|
@ -325,8 +325,9 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn test_rmsprop_optimizer_save_load_state() {
|
||||
let linear = nn::LinearConfig::new(6, 6).init();
|
||||
let x = Tensor::<TestAutodiffBackend, 2>::random([2, 6], Distribution::Default);
|
||||
let device = Default::default();
|
||||
let linear = nn::LinearConfig::new(6, 6).init(&device);
|
||||
let x = Tensor::<TestAutodiffBackend, 2>::random([2, 6], Distribution::Default, &device);
|
||||
let mut optimizer = create_rmsprop();
|
||||
let grads = linear.forward(x).backward();
|
||||
let grads = GradientsParams::from_grads(grads, &linear);
|
||||
|
@ -359,12 +360,12 @@ mod tests {
|
|||
]),
|
||||
Data::from([0.5, 0.5, 0.5, 0.5, 0.5, 0.5]),
|
||||
);
|
||||
let x_1 = Tensor::from_floats([
|
||||
let x_1 = Tensor::from_floats_devauto([
|
||||
[0.6294, 0.0940, 0.8176, 0.8824, 0.5228, 0.4310],
|
||||
[0.7152, 0.9559, 0.7893, 0.5684, 0.5939, 0.8883],
|
||||
])
|
||||
.require_grad();
|
||||
let x_2 = Tensor::from_floats([
|
||||
let x_2 = Tensor::from_floats_devauto([
|
||||
[0.8491, 0.2108, 0.8939, 0.4433, 0.5527, 0.2528],
|
||||
[0.3270, 0.0412, 0.5538, 0.9605, 0.3195, 0.9085],
|
||||
])
|
||||
|
@ -427,12 +428,12 @@ mod tests {
|
|||
]),
|
||||
Data::from([-0.3905, 0.0884, -0.0970, 0.1176, 0.1366, 0.0130]),
|
||||
);
|
||||
let x_1 = Tensor::from_floats([
|
||||
let x_1 = Tensor::from_floats_devauto([
|
||||
[0.6294, 0.0940, 0.8176, 0.8824, 0.5228, 0.4310],
|
||||
[0.7152, 0.9559, 0.7893, 0.5684, 0.5939, 0.8883],
|
||||
])
|
||||
.require_grad();
|
||||
let x_2 = Tensor::from_floats([
|
||||
let x_2 = Tensor::from_floats_devauto([
|
||||
[0.8491, 0.2108, 0.8939, 0.4433, 0.5527, 0.2528],
|
||||
[0.3270, 0.0412, 0.5538, 0.9605, 0.3195, 0.9085],
|
||||
])
|
||||
|
@ -496,8 +497,8 @@ mod tests {
|
|||
bias: Data<f32, 1>,
|
||||
) -> nn::Linear<TestAutodiffBackend> {
|
||||
let record = nn::LinearRecord {
|
||||
weight: Param::from(Tensor::from_data(weight)),
|
||||
bias: Some(Param::from(Tensor::from_data(bias))),
|
||||
weight: Param::from(Tensor::from_data_devauto(weight)),
|
||||
bias: Some(Param::from(Tensor::from_data_devauto(bias))),
|
||||
};
|
||||
|
||||
nn::LinearConfig::new(6, 6).init_with(record)
|
||||
|
@ -505,11 +506,11 @@ mod tests {
|
|||
|
||||
#[allow(dead_code)]
|
||||
fn create_random_tensor() -> Tensor<TestAutodiffBackend, 2> {
|
||||
Tensor::<TestAutodiffBackend, 2>::random(Shape::new([2, 20]), Distribution::Default)
|
||||
Tensor::<TestAutodiffBackend, 2>::random_devauto(Shape::new([2, 20]), Distribution::Default)
|
||||
}
|
||||
|
||||
fn create_rmsprop(
|
||||
) -> OptimizerAdaptor<RMSProp<TestBackend>, Linear<TestAutodiffBackend>, TestAutodiffBackend>
|
||||
) -> OptimizerAdaptor<RMSProp<TestBackend>, nn::Linear<TestAutodiffBackend>, TestAutodiffBackend>
|
||||
{
|
||||
RMSPropConfig {
|
||||
alpha: 0.99,
|
||||
|
|
|
@ -108,9 +108,10 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn with_updated_params_should_have_state() {
|
||||
let layer = layer();
|
||||
let device = Default::default();
|
||||
let layer = layer::<TestAutodiffBackend>(&device);
|
||||
let mut optim = sgd_with_all();
|
||||
let loss = layer.forward(random_tensor());
|
||||
let loss = layer.forward(random_tensor::<TestAutodiffBackend>(&device));
|
||||
let grads = loss.backward();
|
||||
let grads = GradientsParams::from_grads(grads, &layer);
|
||||
let _layer = optim.step(LEARNING_RATE, layer, grads);
|
||||
|
@ -135,9 +136,10 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn should_load_state() {
|
||||
let layer = layer();
|
||||
let device = Default::default();
|
||||
let layer = layer::<TestAutodiffBackend>(&device);
|
||||
let mut optim = sgd_with_all();
|
||||
let loss = layer.forward(random_tensor());
|
||||
let loss = layer.forward(random_tensor(&device));
|
||||
let grads = loss.backward();
|
||||
let grads = GradientsParams::from_grads(grads, &layer);
|
||||
let _layer = optim.step(LEARNING_RATE, layer, grads);
|
||||
|
@ -152,12 +154,12 @@ mod tests {
|
|||
assert_eq!(record.len(), state_restored.len());
|
||||
}
|
||||
|
||||
fn random_tensor() -> Tensor<TestAutodiffBackend, 2> {
|
||||
Tensor::<TestAutodiffBackend, 2>::random(Shape::new([2, 20]), Distribution::Default)
|
||||
fn random_tensor<B: Backend>(device: &B::Device) -> Tensor<B, 2> {
|
||||
Tensor::<B, 2>::random(Shape::new([2, 20]), Distribution::Default, device)
|
||||
}
|
||||
|
||||
fn layer() -> Linear<TestAutodiffBackend> {
|
||||
LinearConfig::new(20, 20).with_bias(true).init()
|
||||
fn layer<B: Backend>(device: &B::Device) -> Linear<B> {
|
||||
LinearConfig::new(20, 20).with_bias(true).init(device)
|
||||
}
|
||||
|
||||
fn sgd_with_all(
|
||||
|
|
|
@ -347,12 +347,13 @@ mod tests {
|
|||
}
|
||||
|
||||
fn test_can_save_and_load<Recorder: FileRecorder>(recorder: Recorder) {
|
||||
let model_before = create_model();
|
||||
let device = Default::default();
|
||||
let model_before = create_model(&device);
|
||||
recorder
|
||||
.record(model_before.clone().into_record(), file_path())
|
||||
.unwrap();
|
||||
|
||||
let model_after = create_model().load_record(recorder.load(file_path()).unwrap());
|
||||
let model_after = create_model(&device).load_record(recorder.load(file_path()).unwrap());
|
||||
|
||||
let byte_recorder = BinBytesRecorder::<FullPrecisionSettings>::default();
|
||||
let model_bytes_before = byte_recorder
|
||||
|
@ -370,10 +371,10 @@ mod tests {
|
|||
phantom: core::marker::PhantomData<B>,
|
||||
}
|
||||
|
||||
pub fn create_model() -> Model<TestBackend> {
|
||||
let conv2d1 = Conv2dConfig::new([1, 8], [3, 3]).init();
|
||||
pub fn create_model(device: &<TestBackend as Backend>::Device) -> Model<TestBackend> {
|
||||
let conv2d1 = Conv2dConfig::new([1, 8], [3, 3]).init(device);
|
||||
|
||||
let linear1 = LinearConfig::new(32, 32).with_bias(true).init();
|
||||
let linear1 = LinearConfig::new(32, 32).with_bias(true).init(device);
|
||||
|
||||
Model {
|
||||
conv2d1,
|
||||
|
|
|
@ -72,7 +72,9 @@ impl<S: PrecisionSettings> Recorder for NamedMpkBytesRecorder<S> {
|
|||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::{module::Module, nn, record::FullPrecisionSettings, TestBackend};
|
||||
use crate::{
|
||||
module::Module, nn, record::FullPrecisionSettings, tensor::backend::Backend, TestBackend,
|
||||
};
|
||||
|
||||
#[test]
|
||||
fn test_can_save_and_load_bin_format() {
|
||||
|
@ -86,8 +88,9 @@ mod tests {
|
|||
}
|
||||
|
||||
fn test_can_save_and_load<Recorder: BytesRecorder>(recorder: Recorder) {
|
||||
let model1 = create_model();
|
||||
let model2 = create_model();
|
||||
let device = Default::default();
|
||||
let model1 = create_model::<TestBackend>(&device);
|
||||
let model2 = create_model::<TestBackend>(&device);
|
||||
let bytes1 = recorder.record(model1.into_record(), ()).unwrap();
|
||||
let bytes2 = recorder.record(model2.clone().into_record(), ()).unwrap();
|
||||
|
||||
|
@ -98,7 +101,7 @@ mod tests {
|
|||
assert_eq!(bytes1, bytes2_after);
|
||||
}
|
||||
|
||||
pub fn create_model() -> nn::Linear<TestBackend> {
|
||||
nn::LinearConfig::new(32, 32).with_bias(true).init()
|
||||
pub fn create_model<B: Backend>(device: &B::Device) -> nn::Linear<B> {
|
||||
nn::LinearConfig::new(32, 32).with_bias(true).init(device)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -97,7 +97,7 @@ impl<B: Backend, const D: usize> Record for Tensor<B, D> {
|
|||
}
|
||||
|
||||
fn from_item<S: PrecisionSettings>(item: Self::Item<S>) -> Self {
|
||||
Tensor::from_data(item.data.convert::<B::FloatElem>())
|
||||
Tensor::from_data_devauto(item.data.convert::<B::FloatElem>())
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -113,7 +113,7 @@ impl<B: Backend, const D: usize> Record for Tensor<B, D, Int> {
|
|||
}
|
||||
|
||||
fn from_item<S: PrecisionSettings>(item: Self::Item<S>) -> Self {
|
||||
Tensor::from_data(item.data.convert())
|
||||
Tensor::from_data_devauto(item.data.convert())
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -129,6 +129,6 @@ impl<B: Backend, const D: usize> Record for Tensor<B, D, Bool> {
|
|||
}
|
||||
|
||||
fn from_item<S: PrecisionSettings>(item: Self::Item<S>) -> Self {
|
||||
Tensor::from_data(item.data)
|
||||
Tensor::from_data_devauto(item.data)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,8 +18,8 @@ struct ModuleTensorConstInt<B: Backend> {
|
|||
}
|
||||
|
||||
impl<B: Backend> ModuleBasic<B> {
|
||||
fn new() -> Self {
|
||||
let weight_basic = Tensor::random(Shape::new([20, 20]), Distribution::Default);
|
||||
fn new(device: &B::Device) -> Self {
|
||||
let weight_basic = Tensor::random(Shape::new([20, 20]), Distribution::Default, device);
|
||||
Self {
|
||||
weight_basic: Param::from(weight_basic),
|
||||
}
|
||||
|
@ -33,11 +33,11 @@ pub struct ModuleComposed<B: Backend> {
|
|||
}
|
||||
|
||||
impl<B: Backend> ModuleComposed<B> {
|
||||
fn new() -> Self {
|
||||
let weight = Tensor::random(Shape::new([20, 20]), Distribution::Default);
|
||||
fn new(device: &B::Device) -> Self {
|
||||
let weight = Tensor::random(Shape::new([20, 20]), Distribution::Default, device);
|
||||
Self {
|
||||
weight: Param::from(weight),
|
||||
basic: ModuleBasic::new(),
|
||||
basic: ModuleBasic::new(device),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -47,8 +47,9 @@ mod state {
|
|||
|
||||
#[test]
|
||||
fn should_load_from_record_basic() {
|
||||
let module_1 = ModuleBasic::<TestBackend>::new();
|
||||
let mut module_2 = ModuleBasic::<TestBackend>::new();
|
||||
let device = <TestBackend as Backend>::Device::default();
|
||||
let module_1 = ModuleBasic::<TestBackend>::new(&device);
|
||||
let mut module_2 = ModuleBasic::<TestBackend>::new(&device);
|
||||
let state_1 = module_1.clone().into_record();
|
||||
|
||||
assert_ne!(
|
||||
|
@ -66,8 +67,9 @@ mod state {
|
|||
|
||||
#[test]
|
||||
fn should_load_from_record_compose() {
|
||||
let module_1 = ModuleComposed::<TestBackend>::new();
|
||||
let mut module_2 = ModuleComposed::<TestBackend>::new();
|
||||
let device = <TestBackend as Backend>::Device::default();
|
||||
let module_1 = ModuleComposed::<TestBackend>::new(&device);
|
||||
let mut module_2 = ModuleComposed::<TestBackend>::new(&device);
|
||||
assert_ne!(module_1.weight.to_data(), module_2.weight.to_data());
|
||||
assert_ne!(
|
||||
module_1.basic.weight_basic.to_data(),
|
||||
|
@ -90,13 +92,15 @@ mod num_params {
|
|||
|
||||
#[test]
|
||||
fn should_calculate_num_params_basic() {
|
||||
let module = ModuleBasic::<TestBackend>::new();
|
||||
let device = <TestBackend as Backend>::Device::default();
|
||||
let module = ModuleBasic::<TestBackend>::new(&device);
|
||||
assert_eq!(20 * 20, module.num_params());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn should_output_state_composed() {
|
||||
let module = ModuleComposed::<TestBackend>::new();
|
||||
let device = <TestBackend as Backend>::Device::default();
|
||||
let module = ModuleComposed::<TestBackend>::new(&device);
|
||||
assert_eq!(2 * 20 * 20, module.num_params());
|
||||
}
|
||||
}
|
||||
|
@ -109,7 +113,8 @@ mod require_grad {
|
|||
|
||||
#[test]
|
||||
fn should_have_grad_by_default() {
|
||||
let module = ModuleBasic::<TestAutodiffBackend>::new();
|
||||
let device = <TestBackend as Backend>::Device::default();
|
||||
let module = ModuleBasic::<TestAutodiffBackend>::new(&device);
|
||||
let mut grads = calculate_grads(&module);
|
||||
|
||||
let grad_x = module.weight_basic.grad_remove(&mut grads);
|
||||
|
@ -119,7 +124,8 @@ mod require_grad {
|
|||
|
||||
#[test]
|
||||
fn should_have_no_grad_after_no_grad() {
|
||||
let module = ModuleBasic::<TestAutodiffBackend>::new().no_grad();
|
||||
let device = <TestAutodiffBackend as Backend>::Device::default();
|
||||
let module = ModuleBasic::<TestAutodiffBackend>::new(&device).no_grad();
|
||||
let mut grads = calculate_grads(&module);
|
||||
|
||||
let grad_x = module.weight_basic.grad_remove(&mut grads);
|
||||
|
@ -129,7 +135,8 @@ mod require_grad {
|
|||
|
||||
#[test]
|
||||
fn should_have_grad_when_from_record() {
|
||||
let module = ModuleBasic::<TestAutodiffBackend>::new();
|
||||
let device = <TestAutodiffBackend as Backend>::Device::default();
|
||||
let module = ModuleBasic::<TestAutodiffBackend>::new(&device);
|
||||
let record = ModuleBasicRecord {
|
||||
weight_basic: module.weight_basic.clone(), // Even when param is no_grad,
|
||||
};
|
||||
|
@ -144,7 +151,7 @@ mod require_grad {
|
|||
fn calculate_grads(
|
||||
module: &ModuleBasic<TestAutodiffBackend>,
|
||||
) -> <TestAutodiffBackend as AutodiffBackend>::Gradients {
|
||||
let x = Tensor::ones([20, 20]).require_grad();
|
||||
let x = Tensor::ones_devauto([20, 20]).require_grad();
|
||||
let y = module.weight_basic.val().matmul(x);
|
||||
|
||||
y.backward()
|
||||
|
|
|
@ -184,12 +184,13 @@ mod tests {
|
|||
where
|
||||
R: FileRecorder,
|
||||
{
|
||||
let device = Default::default();
|
||||
let file_path: PathBuf = file_path(format!("deserialize_with_new_optional_field-{name}"));
|
||||
let model = Model {
|
||||
single_const: 32.0,
|
||||
linear1: nn::LinearConfig::new(20, 20).init::<TestBackend>(),
|
||||
linear1: nn::LinearConfig::new(20, 20).init::<TestBackend>(&device),
|
||||
array_const: [2, 2],
|
||||
linear2: nn::LinearConfig::new(20, 20).init::<TestBackend>(),
|
||||
linear2: nn::LinearConfig::new(20, 20).init::<TestBackend>(&device),
|
||||
};
|
||||
|
||||
recorder
|
||||
|
@ -209,13 +210,14 @@ mod tests {
|
|||
where
|
||||
R: FileRecorder,
|
||||
{
|
||||
let device = Default::default();
|
||||
let file_path: PathBuf =
|
||||
file_path(format!("deserialize_with_removed_optional_field-{name}"));
|
||||
let model = ModelNewOptionalField {
|
||||
single_const: 32.0,
|
||||
linear1: nn::LinearConfig::new(20, 20).init::<TestBackend>(),
|
||||
linear1: nn::LinearConfig::new(20, 20).init::<TestBackend>(&device),
|
||||
array_const: [2, 2],
|
||||
linear2: nn::LinearConfig::new(20, 20).init::<TestBackend>(),
|
||||
linear2: nn::LinearConfig::new(20, 20).init::<TestBackend>(&device),
|
||||
new_field: None,
|
||||
};
|
||||
|
||||
|
@ -233,12 +235,13 @@ mod tests {
|
|||
where
|
||||
R: FileRecorder,
|
||||
{
|
||||
let device = Default::default();
|
||||
let file_path: PathBuf = file_path(format!("deserialize_with_new_constant_field-{name}"));
|
||||
let model = Model {
|
||||
single_const: 32.0,
|
||||
array_const: [2, 2],
|
||||
linear1: nn::LinearConfig::new(20, 20).init::<TestBackend>(),
|
||||
linear2: nn::LinearConfig::new(20, 20).init::<TestBackend>(),
|
||||
linear1: nn::LinearConfig::new(20, 20).init::<TestBackend>(&device),
|
||||
linear2: nn::LinearConfig::new(20, 20).init::<TestBackend>(&device),
|
||||
};
|
||||
|
||||
recorder
|
||||
|
@ -258,13 +261,14 @@ mod tests {
|
|||
where
|
||||
R: FileRecorder,
|
||||
{
|
||||
let device = Default::default();
|
||||
let file_path: PathBuf =
|
||||
file_path(format!("deserialize_with_removed_constant_field-{name}"));
|
||||
let model = ModelNewConstantField {
|
||||
single_const: 32.0,
|
||||
array_const: [2, 2],
|
||||
linear1: nn::LinearConfig::new(20, 20).init::<TestBackend>(),
|
||||
linear2: nn::LinearConfig::new(20, 20).init::<TestBackend>(),
|
||||
linear1: nn::LinearConfig::new(20, 20).init::<TestBackend>(&device),
|
||||
linear2: nn::LinearConfig::new(20, 20).init::<TestBackend>(&device),
|
||||
new_field: 0,
|
||||
};
|
||||
|
||||
|
@ -282,12 +286,13 @@ mod tests {
|
|||
where
|
||||
R: FileRecorder,
|
||||
{
|
||||
let device = Default::default();
|
||||
let file_path: PathBuf = file_path(format!("deserialize_with_new_field_order-{name}"));
|
||||
let model = Model {
|
||||
array_const: [2, 2],
|
||||
single_const: 32.0,
|
||||
linear1: nn::LinearConfig::new(20, 20).init::<TestBackend>(),
|
||||
linear2: nn::LinearConfig::new(20, 20).init::<TestBackend>(),
|
||||
linear1: nn::LinearConfig::new(20, 20).init::<TestBackend>(&device),
|
||||
linear2: nn::LinearConfig::new(20, 20).init::<TestBackend>(&device),
|
||||
};
|
||||
|
||||
recorder
|
||||
|
|
|
@ -70,7 +70,7 @@ mod tests {
|
|||
let model: add::Model<Backend> = add::Model::default();
|
||||
|
||||
// Run the model
|
||||
let input = Tensor::<Backend, 4>::from_floats([[[[1., 2., 3., 4.]]]]);
|
||||
let input = Tensor::<Backend, 4>::from_floats_devauto([[[[1., 2., 3., 4.]]]]);
|
||||
let scalar = 2f64;
|
||||
let output = model.forward(input, scalar);
|
||||
let expected = Data::from([[[[9., 10., 11., 12.]]]]);
|
||||
|
@ -84,7 +84,7 @@ mod tests {
|
|||
let model: add_int::Model<Backend> = add_int::Model::default();
|
||||
|
||||
// Run the model
|
||||
let input = Tensor::<Backend, 4, Int>::from_ints([[[[1, 2, 3, 4]]]]);
|
||||
let input = Tensor::<Backend, 4, Int>::from_ints_devauto([[[[1, 2, 3, 4]]]]);
|
||||
let scalar = 2;
|
||||
let output = model.forward(input, scalar);
|
||||
let expected = Data::from([[[[9, 11, 13, 15]]]]);
|
||||
|
@ -98,7 +98,7 @@ mod tests {
|
|||
let model: sub::Model<Backend> = sub::Model::default();
|
||||
|
||||
// Run the model
|
||||
let input = Tensor::<Backend, 4>::from_floats([[[[1., 2., 3., 4.]]]]);
|
||||
let input = Tensor::<Backend, 4>::from_floats_devauto([[[[1., 2., 3., 4.]]]]);
|
||||
let scalar = 3.0f64;
|
||||
let output = model.forward(input, scalar);
|
||||
let expected = Data::from([[[[6., 7., 8., 9.]]]]);
|
||||
|
@ -112,7 +112,7 @@ mod tests {
|
|||
let model: sub_int::Model<Backend> = sub_int::Model::default();
|
||||
|
||||
// Run the model
|
||||
let input = Tensor::<Backend, 4, Int>::from_ints([[[[1, 2, 3, 4]]]]);
|
||||
let input = Tensor::<Backend, 4, Int>::from_ints_devauto([[[[1, 2, 3, 4]]]]);
|
||||
let scalar = 3;
|
||||
let output = model.forward(input, scalar);
|
||||
let expected = Data::from([[[[6, 6, 6, 6]]]]);
|
||||
|
@ -125,7 +125,7 @@ mod tests {
|
|||
let model: mul::Model<Backend> = mul::Model::default();
|
||||
|
||||
// Run the model
|
||||
let input = Tensor::<Backend, 4>::from_floats([[[[1., 2., 3., 4.]]]]);
|
||||
let input = Tensor::<Backend, 4>::from_floats_devauto([[[[1., 2., 3., 4.]]]]);
|
||||
let scalar = 6.0f64;
|
||||
let output = model.forward(input, scalar);
|
||||
let expected = Data::from([[[[126., 252., 378., 504.]]]]);
|
||||
|
@ -136,10 +136,11 @@ mod tests {
|
|||
#[test]
|
||||
fn div_tensor_by_scalar_and_tensor_by_tensor() {
|
||||
// Initialize the model without weights (because the exported file does not contain them)
|
||||
let model: div::Model<Backend> = div::Model::new();
|
||||
let device = Default::default();
|
||||
let model: div::Model<Backend> = div::Model::new(&device);
|
||||
|
||||
// Run the model
|
||||
let input = Tensor::<Backend, 4>::from_floats([[[[3., 6., 6., 9.]]]]);
|
||||
let input = Tensor::<Backend, 4>::from_floats([[[[3., 6., 6., 9.]]]], &device);
|
||||
let scalar1 = 9.0f64;
|
||||
let scalar2 = 3.0f64;
|
||||
let output = model.forward(input, scalar1, scalar2);
|
||||
|
@ -151,10 +152,11 @@ mod tests {
|
|||
#[test]
|
||||
fn concat_tensors() {
|
||||
// Initialize the model
|
||||
let model: concat::Model<Backend> = concat::Model::new();
|
||||
let device = Default::default();
|
||||
let model: concat::Model<Backend> = concat::Model::new(&device);
|
||||
|
||||
// Run the model
|
||||
let input = Tensor::<Backend, 4>::zeros([1, 2, 3, 5]);
|
||||
let input = Tensor::<Backend, 4>::zeros([1, 2, 3, 5], &device);
|
||||
|
||||
let output = model.forward(input);
|
||||
|
||||
|
@ -169,7 +171,7 @@ mod tests {
|
|||
let model: conv1d::Model<Backend> = conv1d::Model::default();
|
||||
|
||||
// Run the model with pi as input for easier testing
|
||||
let input = Tensor::<Backend, 3>::full([6, 4, 10], consts::PI);
|
||||
let input = Tensor::<Backend, 3>::full_devauto([6, 4, 10], consts::PI);
|
||||
|
||||
let output = model.forward(input);
|
||||
|
||||
|
@ -190,7 +192,7 @@ mod tests {
|
|||
let model: conv2d::Model<Backend> = conv2d::Model::default();
|
||||
|
||||
// Run the model with ones as input for easier testing
|
||||
let input = Tensor::<Backend, 4>::ones([2, 4, 10, 15]);
|
||||
let input = Tensor::<Backend, 4>::ones_devauto([2, 4, 10, 15]);
|
||||
|
||||
let output = model.forward(input);
|
||||
|
||||
|
@ -211,7 +213,7 @@ mod tests {
|
|||
let model: dropout_opset16::Model<Backend> = dropout_opset16::Model::default();
|
||||
|
||||
// Run the model with ones as input for easier testing
|
||||
let input = Tensor::<Backend, 4>::ones([2, 4, 10, 15]);
|
||||
let input = Tensor::<Backend, 4>::ones_devauto([2, 4, 10, 15]);
|
||||
|
||||
let output = model.forward(input);
|
||||
|
||||
|
@ -230,7 +232,7 @@ mod tests {
|
|||
let model: dropout_opset7::Model<Backend> = dropout_opset7::Model::default();
|
||||
|
||||
// Run the model with ones as input for easier testing
|
||||
let input = Tensor::<Backend, 4>::ones([2, 4, 10, 15]);
|
||||
let input = Tensor::<Backend, 4>::ones_devauto([2, 4, 10, 15]);
|
||||
|
||||
let output = model.forward(input);
|
||||
|
||||
|
@ -248,9 +250,10 @@ mod tests {
|
|||
fn erf() {
|
||||
let model: erf::Model<Backend> = erf::Model::default();
|
||||
|
||||
let input = Tensor::<Backend, 4>::from_data([[[[1.0, 2.0, 3.0, 4.0]]]]);
|
||||
let input = Tensor::<Backend, 4>::from_data_devauto([[[[1.0, 2.0, 3.0, 4.0]]]]);
|
||||
let output = model.forward(input);
|
||||
let expected = Tensor::<Backend, 4>::from_data([[[[0.8427, 0.9953, 1.0000, 1.0000]]]]);
|
||||
let expected =
|
||||
Tensor::<Backend, 4>::from_data_devauto([[[[0.8427, 0.9953, 1.0000, 1.0000]]]]);
|
||||
|
||||
output.to_data().assert_approx_eq(&expected.to_data(), 4);
|
||||
}
|
||||
|
@ -261,8 +264,8 @@ mod tests {
|
|||
let model: gather::Model<Backend> = gather::Model::default();
|
||||
|
||||
// Run the model
|
||||
let input = Tensor::<Backend, 2>::from_floats([[1., 2.], [3., 4.]]);
|
||||
let index = Tensor::<Backend, 2, Int>::from_ints([[0, 0], [1, 0]]);
|
||||
let input = Tensor::<Backend, 2>::from_floats_devauto([[1., 2.], [3., 4.]]);
|
||||
let index = Tensor::<Backend, 2, Int>::from_ints_devauto([[0, 0], [1, 0]]);
|
||||
let output = model.forward(input, index);
|
||||
let expected = Data::from([[1., 1.], [4., 3.]]);
|
||||
|
||||
|
@ -275,8 +278,8 @@ mod tests {
|
|||
let model: global_avr_pool::Model<Backend> = global_avr_pool::Model::default();
|
||||
|
||||
// Run the model with ones as input for easier testing
|
||||
let input_1d = Tensor::<Backend, 3>::ones([2, 4, 10]);
|
||||
let input_2d = Tensor::<Backend, 4>::ones([3, 10, 3, 15]);
|
||||
let input_1d = Tensor::<Backend, 3>::ones_devauto([2, 4, 10]);
|
||||
let input_2d = Tensor::<Backend, 4>::ones_devauto([3, 10, 3, 15]);
|
||||
|
||||
let (output_1d, output_2d) = model.forward(input_1d, input_2d);
|
||||
|
||||
|
@ -298,13 +301,17 @@ mod tests {
|
|||
#[test]
|
||||
fn softmax() {
|
||||
// Initialize the model without weights (because the exported file does not contain them)
|
||||
let model: softmax::Model<Backend> = softmax::Model::new();
|
||||
let device = Default::default();
|
||||
let model: softmax::Model<Backend> = softmax::Model::new(&device);
|
||||
|
||||
// Run the model
|
||||
let input = Tensor::<Backend, 2>::from_floats([
|
||||
[0.33669037, 0.128_809_4, 0.23446237],
|
||||
[0.23033303, -1.122_856_4, -0.18632829],
|
||||
]);
|
||||
let input = Tensor::<Backend, 2>::from_floats(
|
||||
[
|
||||
[0.33669037, 0.128_809_4, 0.23446237],
|
||||
[0.23033303, -1.122_856_4, -0.18632829],
|
||||
],
|
||||
&device,
|
||||
);
|
||||
let output = model.forward(input);
|
||||
let expected = Data::from([
|
||||
[0.36830685, 0.29917702, 0.33251613],
|
||||
|
@ -317,13 +324,17 @@ mod tests {
|
|||
#[test]
|
||||
fn log_softmax() {
|
||||
// Initialize the model without weights (because the exported file does not contain them)
|
||||
let model: log_softmax::Model<Backend> = log_softmax::Model::new();
|
||||
let device = Default::default();
|
||||
let model: log_softmax::Model<Backend> = log_softmax::Model::new(&device);
|
||||
|
||||
// Run the model
|
||||
let input = Tensor::<Backend, 2>::from_floats([
|
||||
[0.33669037, 0.128_809_4, 0.23446237],
|
||||
[0.23033303, -1.122_856_4, -0.18632829],
|
||||
]);
|
||||
let input = Tensor::<Backend, 2>::from_floats(
|
||||
[
|
||||
[0.33669037, 0.128_809_4, 0.23446237],
|
||||
[0.23033303, -1.122_856_4, -0.18632829],
|
||||
],
|
||||
&device,
|
||||
);
|
||||
let output = model.forward(input);
|
||||
let expected = Data::from([
|
||||
[-0.998_838_9, -1.206_719_9, -1.101_067],
|
||||
|
@ -335,9 +346,10 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn sqrt() {
|
||||
let model: sqrt::Model<Backend> = sqrt::Model::new();
|
||||
let device = Default::default();
|
||||
let model: sqrt::Model<Backend> = sqrt::Model::new(&device);
|
||||
|
||||
let input1 = Tensor::<Backend, 4>::from_floats([[[[1.0, 4.0, 9.0, 25.0]]]]);
|
||||
let input1 = Tensor::<Backend, 4>::from_floats([[[[1.0, 4.0, 9.0, 25.0]]]], &device);
|
||||
let input2 = 36f64;
|
||||
|
||||
let (output1, output2) = model.forward(input1, input2);
|
||||
|
@ -351,16 +363,20 @@ mod tests {
|
|||
#[test]
|
||||
fn maxpool2d() {
|
||||
// Initialize the model without weights (because the exported file does not contain them)
|
||||
let model: maxpool2d::Model<Backend> = maxpool2d::Model::new();
|
||||
let device = Default::default();
|
||||
let model: maxpool2d::Model<Backend> = maxpool2d::Model::new(&device);
|
||||
|
||||
// Run the model
|
||||
let input = Tensor::<Backend, 4>::from_floats([[[
|
||||
[1.927, 1.487, 0.901, -2.106, 0.678],
|
||||
[-1.235, -0.043, -1.605, -0.752, -0.687],
|
||||
[-0.493, 0.241, -1.111, 0.092, -2.317],
|
||||
[-0.217, -1.385, -0.396, 0.803, -0.622],
|
||||
[-0.592, -0.063, -0.829, 0.331, -1.558],
|
||||
]]]);
|
||||
let input = Tensor::<Backend, 4>::from_floats(
|
||||
[[[
|
||||
[1.927, 1.487, 0.901, -2.106, 0.678],
|
||||
[-1.235, -0.043, -1.605, -0.752, -0.687],
|
||||
[-0.493, 0.241, -1.111, 0.092, -2.317],
|
||||
[-0.217, -1.385, -0.396, 0.803, -0.622],
|
||||
[-0.592, -0.063, -0.829, 0.331, -1.558],
|
||||
]]],
|
||||
&device,
|
||||
);
|
||||
let output = model.forward(input);
|
||||
let expected = Data::from([[[
|
||||
[0.901, 1.927, 1.487, 0.901],
|
||||
|
@ -374,16 +390,20 @@ mod tests {
|
|||
#[test]
|
||||
fn avg_pool2d() {
|
||||
// Initialize the model without weights (because the exported file does not contain them)
|
||||
let model: avg_pool2d::Model<Backend> = avg_pool2d::Model::new();
|
||||
let device = Default::default();
|
||||
let model: avg_pool2d::Model<Backend> = avg_pool2d::Model::new(&device);
|
||||
|
||||
// Run the model
|
||||
let input = Tensor::<Backend, 4>::from_floats([[[
|
||||
[-0.077, 0.360, -0.782, 0.072, 0.665],
|
||||
[-0.287, 1.621, -1.597, -0.052, 0.611],
|
||||
[0.760, -0.034, -0.345, 0.494, -0.078],
|
||||
[-1.805, -0.476, 0.205, 0.338, 1.353],
|
||||
[0.374, 0.013, 0.774, -0.109, -0.271],
|
||||
]]]);
|
||||
let input = Tensor::<Backend, 4>::from_floats(
|
||||
[[[
|
||||
[-0.077, 0.360, -0.782, 0.072, 0.665],
|
||||
[-0.287, 1.621, -1.597, -0.052, 0.611],
|
||||
[0.760, -0.034, -0.345, 0.494, -0.078],
|
||||
[-1.805, -0.476, 0.205, 0.338, 1.353],
|
||||
[0.374, 0.013, 0.774, -0.109, -0.271],
|
||||
]]],
|
||||
&device,
|
||||
);
|
||||
let (output1, output2, output3) = model.forward(input.clone(), input.clone(), input);
|
||||
let expected1 = Data::from([[[[0.008, -0.131, -0.208, 0.425]]]]);
|
||||
let expected2 = Data::from([[[
|
||||
|
@ -413,10 +433,11 @@ mod tests {
|
|||
#[test]
|
||||
fn reshape() {
|
||||
// Initialize the model without weights (because the exported file does not contain them)
|
||||
let model: reshape::Model<Backend> = reshape::Model::new();
|
||||
let device = Default::default();
|
||||
let model: reshape::Model<Backend> = reshape::Model::new(&device);
|
||||
|
||||
// Run the model
|
||||
let input = Tensor::<Backend, 1>::from_floats([0., 1., 2., 3.]);
|
||||
let input = Tensor::<Backend, 1>::from_floats([0., 1., 2., 3.], &device);
|
||||
let output = model.forward(input);
|
||||
let expected = Data::from([[0., 1., 2., 3.]]);
|
||||
|
||||
|
@ -426,10 +447,11 @@ mod tests {
|
|||
#[test]
|
||||
fn flatten() {
|
||||
// Initialize the model without weights (because the exported file does not contain them)
|
||||
let model: flatten::Model<Backend> = flatten::Model::new();
|
||||
let device = Default::default();
|
||||
let model: flatten::Model<Backend> = flatten::Model::new(&device);
|
||||
|
||||
// Run the model
|
||||
let input = Tensor::<Backend, 3>::ones([1, 5, 15]);
|
||||
let input = Tensor::<Backend, 3>::ones([1, 5, 15], &device);
|
||||
let output = model.forward(input);
|
||||
|
||||
let expected_shape = Shape::from([1, 75]);
|
||||
|
@ -441,7 +463,7 @@ mod tests {
|
|||
let model: batch_norm::Model<Backend> = batch_norm::Model::default();
|
||||
|
||||
// Run the model with ones as input for easier testing
|
||||
let input = Tensor::<Backend, 3>::ones([1, 20, 1]);
|
||||
let input = Tensor::<Backend, 3>::ones_devauto([1, 20, 1]);
|
||||
let output = model.forward(input);
|
||||
|
||||
let expected_shape = Shape::from([1, 5, 2, 2]);
|
||||
|
@ -455,13 +477,17 @@ mod tests {
|
|||
#[test]
|
||||
fn relu() {
|
||||
// Initialize the model without weights (because the exported file does not contain them)
|
||||
let model: relu::Model<Backend> = relu::Model::new();
|
||||
let device = Default::default();
|
||||
let model: relu::Model<Backend> = relu::Model::new(&device);
|
||||
|
||||
// Run the model
|
||||
let input = Tensor::<Backend, 2>::from_floats([
|
||||
[0.33669037, 0.128_809_4, 0.23446237],
|
||||
[0.23033303, -1.122_856_4, -0.18632829],
|
||||
]);
|
||||
let input = Tensor::<Backend, 2>::from_floats(
|
||||
[
|
||||
[0.33669037, 0.128_809_4, 0.23446237],
|
||||
[0.23033303, -1.122_856_4, -0.18632829],
|
||||
],
|
||||
&device,
|
||||
);
|
||||
let output = model.forward(input);
|
||||
let expected = Data::from([
|
||||
[0.33669037, 0.128_809_4, 0.23446237],
|
||||
|
@ -474,13 +500,17 @@ mod tests {
|
|||
#[test]
|
||||
fn sigmoid() {
|
||||
// Initialize the model without weights (because the exported file does not contain them)
|
||||
let model: sigmoid::Model<Backend> = sigmoid::Model::new();
|
||||
let device = Default::default();
|
||||
let model: sigmoid::Model<Backend> = sigmoid::Model::new(&device);
|
||||
|
||||
// Run the model
|
||||
let input = Tensor::<Backend, 2>::from_floats([
|
||||
[0.33669037, 0.128_809_4, 0.23446237],
|
||||
[0.23033303, -1.122_856_4, -0.18632829],
|
||||
]);
|
||||
let input = Tensor::<Backend, 2>::from_floats(
|
||||
[
|
||||
[0.33669037, 0.128_809_4, 0.23446237],
|
||||
[0.23033303, -1.122_856_4, -0.18632829],
|
||||
],
|
||||
&device,
|
||||
);
|
||||
let output = model.forward(input);
|
||||
let expected = Data::from([
|
||||
[0.58338636, 0.532_157_9, 0.55834854],
|
||||
|
@ -493,13 +523,17 @@ mod tests {
|
|||
#[test]
|
||||
fn transpose() {
|
||||
// Initialize the model without weights (because the exported file does not contain them)
|
||||
let model: transpose::Model<Backend> = transpose::Model::new();
|
||||
let device = Default::default();
|
||||
let model: transpose::Model<Backend> = transpose::Model::new(&device);
|
||||
|
||||
// Run the model
|
||||
let input = Tensor::<Backend, 2>::from_floats([
|
||||
[0.33669037, 0.128_809_4, 0.23446237],
|
||||
[0.23033303, -1.122_856_4, -0.18632829],
|
||||
]);
|
||||
let input = Tensor::<Backend, 2>::from_floats(
|
||||
[
|
||||
[0.33669037, 0.128_809_4, 0.23446237],
|
||||
[0.23033303, -1.122_856_4, -0.18632829],
|
||||
],
|
||||
&device,
|
||||
);
|
||||
let output = model.forward(input);
|
||||
let expected = Data::from([
|
||||
[0.33669037, 0.23033303],
|
||||
|
@ -516,7 +550,7 @@ mod tests {
|
|||
let model: equal::Model<Backend> = equal::Model::default();
|
||||
|
||||
// Run the model
|
||||
let input = Tensor::<Backend, 4>::from_floats([[[[1., 1., 1., 1.]]]]);
|
||||
let input = Tensor::<Backend, 4>::from_floats_devauto([[[[1., 1., 1., 1.]]]]);
|
||||
|
||||
let scalar = 2f64;
|
||||
let (tensor_out, scalar_out) = model.forward(input, scalar);
|
||||
|
@ -530,17 +564,21 @@ mod tests {
|
|||
#[test]
|
||||
fn clip_opset16() {
|
||||
// Initialize the model without weights (because the exported file does not contain them)
|
||||
let model: clip_opset16::Model<Backend> = clip_opset16::Model::new();
|
||||
let device = Default::default();
|
||||
let model: clip_opset16::Model<Backend> = clip_opset16::Model::new(&device);
|
||||
|
||||
// Run the model
|
||||
let input = Tensor::<Backend, 1>::from_floats([
|
||||
0.88226926,
|
||||
0.91500396,
|
||||
0.38286376,
|
||||
0.95930564,
|
||||
0.390_448_2,
|
||||
0.60089535,
|
||||
]);
|
||||
let input = Tensor::<Backend, 1>::from_floats(
|
||||
[
|
||||
0.88226926,
|
||||
0.91500396,
|
||||
0.38286376,
|
||||
0.95930564,
|
||||
0.390_448_2,
|
||||
0.60089535,
|
||||
],
|
||||
&device,
|
||||
);
|
||||
let (output1, output2, output3) = model.forward(input);
|
||||
let expected1 = Data::from([
|
||||
0.88226926,
|
||||
|
@ -561,17 +599,21 @@ mod tests {
|
|||
#[test]
|
||||
fn clip_opset7() {
|
||||
// Initialize the model without weights (because the exported file does not contain them)
|
||||
let model: clip_opset7::Model<Backend> = clip_opset7::Model::new();
|
||||
let device = Default::default();
|
||||
let model: clip_opset7::Model<Backend> = clip_opset7::Model::new(&device);
|
||||
|
||||
// Run the model
|
||||
let input = Tensor::<Backend, 1>::from_floats([
|
||||
0.88226926,
|
||||
0.91500396,
|
||||
0.38286376,
|
||||
0.95930564,
|
||||
0.390_448_2,
|
||||
0.60089535,
|
||||
]);
|
||||
let input = Tensor::<Backend, 1>::from_floats(
|
||||
[
|
||||
0.88226926,
|
||||
0.91500396,
|
||||
0.38286376,
|
||||
0.95930564,
|
||||
0.390_448_2,
|
||||
0.60089535,
|
||||
],
|
||||
&device,
|
||||
);
|
||||
let (output1, output2, output3) = model.forward(input);
|
||||
let expected1 = Data::from([
|
||||
0.88226926,
|
||||
|
@ -594,11 +636,11 @@ mod tests {
|
|||
// Initialize the model with weights (loaded from the exported file)
|
||||
let model: linear::Model<Backend> = linear::Model::default();
|
||||
#[allow(clippy::approx_constant)]
|
||||
let input1 = Tensor::<Backend, 2>::full([4, 3], 3.14);
|
||||
let input1 = Tensor::<Backend, 2>::full_devauto([4, 3], 3.14);
|
||||
#[allow(clippy::approx_constant)]
|
||||
let input2 = Tensor::<Backend, 2>::full([2, 5], 3.14);
|
||||
let input2 = Tensor::<Backend, 2>::full_devauto([2, 5], 3.14);
|
||||
#[allow(clippy::approx_constant)]
|
||||
let input3 = Tensor::<Backend, 3>::full([3, 2, 7], 3.14);
|
||||
let input3 = Tensor::<Backend, 3>::full_devauto([3, 2, 7], 3.14);
|
||||
|
||||
let (output1, output2, output3) = model.forward(input1, input2, input3);
|
||||
|
||||
|
@ -628,10 +670,11 @@ mod tests {
|
|||
#[test]
|
||||
fn tanh() {
|
||||
// Initialize the model
|
||||
let model = tanh::Model::<Backend>::new();
|
||||
let device = Default::default();
|
||||
let model = tanh::Model::<Backend>::new(&device);
|
||||
|
||||
// Run the model
|
||||
let input = Tensor::<Backend, 4>::from_floats([[[[1., 2., 3., 4.]]]]);
|
||||
let input = Tensor::<Backend, 4>::from_floats([[[[1., 2., 3., 4.]]]], &device);
|
||||
let output = model.forward(input);
|
||||
// data from pyTorch
|
||||
let expected = Data::from([[[[0.7616, 0.9640, 0.9951, 0.9993]]]]);
|
||||
|
@ -641,10 +684,11 @@ mod tests {
|
|||
#[test]
|
||||
fn recip() {
|
||||
// Initialize the model
|
||||
let model = recip::Model::<Backend>::new();
|
||||
let device = Default::default();
|
||||
let model = recip::Model::<Backend>::new(&device);
|
||||
|
||||
// Run the model
|
||||
let input = Tensor::<Backend, 4>::from_floats([[[[1., 2., 3., 4.]]]]);
|
||||
let input = Tensor::<Backend, 4>::from_floats([[[[1., 2., 3., 4.]]]], &device);
|
||||
let output = model.forward(input);
|
||||
// data from pyTorch
|
||||
let expected = Data::from([[[[1.0000, 0.5000, 0.3333, 0.2500]]]]);
|
||||
|
@ -657,7 +701,7 @@ mod tests {
|
|||
let model: conv_transpose2d::Model<Backend> = conv_transpose2d::Model::default();
|
||||
|
||||
// Run the model with ones as input for easier testing
|
||||
let input = Tensor::<Backend, 4>::ones([2, 4, 10, 15]);
|
||||
let input = Tensor::<Backend, 4>::ones_devauto([2, 4, 10, 15]);
|
||||
|
||||
let output = model.forward(input);
|
||||
|
||||
|
@ -675,9 +719,10 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn cos() {
|
||||
let model: cos::Model<Backend> = cos::Model::new();
|
||||
let device = Default::default();
|
||||
let model: cos::Model<Backend> = cos::Model::new(&device);
|
||||
|
||||
let input = Tensor::<Backend, 4>::from_floats([[[[1.0, 4.0, 9.0, 25.0]]]]);
|
||||
let input = Tensor::<Backend, 4>::from_floats([[[[1.0, 4.0, 9.0, 25.0]]]], &device);
|
||||
|
||||
let output = model.forward(input);
|
||||
let expected = Data::from([[[[0.5403, -0.6536, -0.9111, 0.9912]]]]);
|
||||
|
@ -688,9 +733,10 @@ mod tests {
|
|||
#[test]
|
||||
#[allow(clippy::approx_constant)]
|
||||
fn exp() {
|
||||
let model: exp::Model<Backend> = exp::Model::new();
|
||||
let device = Default::default();
|
||||
let model: exp::Model<Backend> = exp::Model::new(&device);
|
||||
|
||||
let input = Tensor::<Backend, 4>::from_floats([[[[0.0000, 0.6931]]]]);
|
||||
let input = Tensor::<Backend, 4>::from_floats([[[[0.0000, 0.6931]]]], &device);
|
||||
|
||||
let output = model.forward(input);
|
||||
let expected = Data::from([[[[1., 2.]]]]);
|
||||
|
@ -700,9 +746,10 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn gelu() {
|
||||
let model: gelu::Model<Backend> = gelu::Model::new();
|
||||
let device = Default::default();
|
||||
let model: gelu::Model<Backend> = gelu::Model::new(&device);
|
||||
|
||||
let input = Tensor::<Backend, 4>::from_floats([[[[1.0, 4.0, 9.0, 25.0]]]]);
|
||||
let input = Tensor::<Backend, 4>::from_floats([[[[1.0, 4.0, 9.0, 25.0]]]], &device);
|
||||
|
||||
let output = model.forward(input);
|
||||
let expected = Data::from([[[[0.8413, 3.9999, 9.0000, 25.0000]]]]);
|
||||
|
@ -712,9 +759,10 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn log() {
|
||||
let model: log::Model<Backend> = log::Model::new();
|
||||
let device = Default::default();
|
||||
let model: log::Model<Backend> = log::Model::new(&device);
|
||||
|
||||
let input = Tensor::<Backend, 4>::from_floats([[[[1.0, 4.0, 9.0, 25.0]]]]);
|
||||
let input = Tensor::<Backend, 4>::from_floats([[[[1.0, 4.0, 9.0, 25.0]]]], &device);
|
||||
|
||||
let output = model.forward(input);
|
||||
let expected = Data::from([[[[0.0000, 1.3863, 2.1972, 3.2189]]]]);
|
||||
|
@ -724,9 +772,26 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn neg() {
|
||||
let model: neg::Model<Backend> = neg::Model::new();
|
||||
let device = Default::default();
|
||||
let model: neg::Model<Backend> = neg::Model::new(&device);
|
||||
|
||||
let input1 = Tensor::<Backend, 4>::from_floats([[[[1.0, 4.0, 9.0, 25.0]]]]);
|
||||
let input1 = Tensor::<Backend, 4>::from_floats([[[[1.0, 4.0, 9.0, 25.0]]]], &device);
|
||||
let input2 = 99f64;
|
||||
|
||||
let (output1, output2) = model.forward(input1, input2);
|
||||
let expected1 = Data::from([[[[-1.0, -4.0, -9.0, -25.0]]]]);
|
||||
let expected2 = -99f64;
|
||||
|
||||
output1.to_data().assert_approx_eq(&expected1, 4);
|
||||
|
||||
assert_eq!(output2, expected2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_model_creation_with_a_default_device() {
|
||||
let model: neg::Model<Backend> = neg::Model::new_devauto();
|
||||
|
||||
let input1 = Tensor::<Backend, 4>::from_floats_devauto([[[[1.0, 4.0, 9.0, 25.0]]]]);
|
||||
let input2 = 99f64;
|
||||
|
||||
let (output1, output2) = model.forward(input1, input2);
|
||||
|
|
|
@ -25,7 +25,7 @@ macro_rules! test_model {
|
|||
let model: $mod_name::Model<Backend> = $mod_name::Model::default();
|
||||
|
||||
// Run the model with pi as input for easier testing
|
||||
let input = Tensor::<Backend, 3>::full([6, 4, 10], consts::PI);
|
||||
let input = Tensor::<Backend, 3>::full_devauto([6, 4, 10], consts::PI);
|
||||
|
||||
let output = model.forward(input);
|
||||
|
||||
|
|
|
@ -448,8 +448,8 @@ impl<PS: PrecisionSettings> BurnGraph<PS> {
|
|||
.collect::<Vec<_>>();
|
||||
|
||||
quote! {
|
||||
#[allow(dead_code)]
|
||||
pub fn new() -> Self {
|
||||
#[allow(dead_code, unused_variables)]
|
||||
pub fn new(device: &B::Device) -> Self {
|
||||
#body
|
||||
|
||||
Self {
|
||||
|
@ -457,6 +457,11 @@ impl<PS: PrecisionSettings> BurnGraph<PS> {
|
|||
phantom: core::marker::PhantomData,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn new_devauto() -> Self {
|
||||
let device = B::Device::default();
|
||||
Self::new(&device)
|
||||
}
|
||||
}
|
||||
}
|
||||
fn codegen_new_record(&self) -> TokenStream {
|
||||
|
|
|
@ -81,19 +81,19 @@ macro_rules! batch_norm_serialize {
|
|||
BatchNormRecord {
|
||||
gamma: Param::new(
|
||||
ParamId::new(),
|
||||
Tensor::from_data($self.gamma.clone().convert()),
|
||||
Tensor::from_data_devauto($self.gamma.clone().convert()),
|
||||
),
|
||||
beta: Param::new(
|
||||
ParamId::new(),
|
||||
Tensor::from_data($self.beta.clone().convert()),
|
||||
Tensor::from_data_devauto($self.beta.clone().convert()),
|
||||
),
|
||||
running_mean: Param::new(
|
||||
ParamId::new(),
|
||||
Tensor::from_data($self.running_mean.clone().convert()),
|
||||
Tensor::from_data_devauto($self.running_mean.clone().convert()),
|
||||
),
|
||||
running_var: Param::new(
|
||||
ParamId::new(),
|
||||
Tensor::from_data($self.running_var.clone().convert()),
|
||||
Tensor::from_data_devauto($self.running_var.clone().convert()),
|
||||
),
|
||||
epsilon: ConstantRecord::new(),
|
||||
momentum: ConstantRecord::new(),
|
||||
|
@ -123,7 +123,7 @@ impl<PS: PrecisionSettings> NodeCodegen<PS> for BatchNormNode<PS> {
|
|||
init_with(record.#name);
|
||||
},
|
||||
false => quote! {
|
||||
init();
|
||||
init(device);
|
||||
},
|
||||
};
|
||||
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue