Feature/codegen gather indices greater than rank 1 (#2199)

* implemented muli-dim index for GatherNode The `NodeCodegen` impl for `GatherNode` now performs gather in complete accordance with the ONNX Gather spec. - a `gather` function was added to the gather.rs file - `gather()` is now called within the codegen instead of `tensor.select()` - a test with two test cases have been added - test axes 0 and 1 - both use 2D index tensors * add gather_onnx to numeric api Added int and float implementations of gather to the burn-tensor numeric api: - named the methods `gather_onnx` to not be confused with the current `gather` - these implementations follow the `Gather` ONNX spec Updated the gather*.py variants and their onnx outputs * modified files didn't end up in last commit * tests passing for onnx gather The implementation of gather for the ONNX `Gather` spec is tentatively complete: - py test models are updated - onnx_tests are modified and passing: `gather`, `gather_scalar`, and `gather_shape` - node/gather tests are passing NOTE: The two additional tests in crates/burn-import/src/burn/node/gather.rs that test the actual functionality of gather are likely to be deleted, since they are redundant to the tests in crates/burn-import/onnx-tests/tests/onnx_tests.rs. * inlined onnx gather within codegen * rm gather_onnx from public api; rm unnecessary tests * add comments to gather py models * some codegen changes; formatting to appease run-checks - Some necessary changes and improvements to the codegen inlined code after translating from public api (removed in previous commit). - Changed some formatting that run-checks complained about. * simplify gather codegen; include 1d and 2d onnx tests Modified the `Gather` codegen per requested changes: - combined match statements on index - remove use of `alloc::vec::Vec` - use map -> collect instead of procedural - include a 1d index gather onnx test - remove superflous tests * delete unused gather.onnx
2024-08-28 04:51:19 -07:00 · 2024-08-28 04:51:19 -07:00 · 0292967000
parent 795201dcfc
commit 0292967000
14 changed files with 318 additions and 119 deletions
--- a/crates/burn-import/onnx-tests/build.rs
+++ b/crates/burn-import/onnx-tests/build.rs
@ -32,7 +32,8 @@ fn main() {
        .input("tests/exp/exp.onnx")
        .input("tests/expand/expand.onnx")
        .input("tests/flatten/flatten.onnx")
-        .input("tests/gather/gather.onnx")
+        .input("tests/gather/gather_1d_idx.onnx")
+        .input("tests/gather/gather_2d_idx.onnx")
        .input("tests/gather/gather_scalar.onnx")
        .input("tests/gather/gather_shape.onnx")
        .input("tests/gather_elements/gather_elements.onnx")
--- a/crates/burn-import/onnx-tests/tests/gather/gather.onnx
+++ b/crates/burn-import/onnx-tests/tests/gather/gather.onnx
@ -1,18 +0,0 @@
-pytorch2.1.1:¤
-A
-onnx::Gather_0
-onnx::Gather_12/Gather"Gather*
-axis 
-main_graphZ 
-onnx::Gather_0
-
-
-Z
-onnx::Gather_1
-
-
-b
-2
-
-
-B
--- a/crates/burn-import/onnx-tests/tests/gather/gather.py
+++ b/crates/burn-import/onnx-tests/tests/gather/gather.py
@ -1,47 +0,0 @@
-#!/usr/bin/env python3
-
-# used to generate model: onnx-tests/tests/gather/gather.onnx
-
-import torch
-import torch.nn as nn
-
-
-class Model(nn.Module):
-    def __init__(self):
-        super(Model, self).__init__()
-
-    def forward(self, x, index):
-        gathered = torch.index_select(x, 1, index)
-        return gathered
-
-
-def main():
-    # Set random seed for reproducibility
-    torch.manual_seed(0)
-
-    # Export to onnx
-    model = Model()
-    model.eval()
-    device = torch.device("cpu")
-    onnx_name = "gather.onnx"
-
-    dummy_input = torch.randn(2, 3, device=device)
-    dummy_index = torch.tensor([0, 2], device=device, dtype=torch.int64)
-
-    torch.onnx.export(model, (dummy_input, dummy_index), onnx_name,
-                      verbose=False, opset_version=16)
-
-    print("Finished exporting model to {}".format(onnx_name))
-
-    # Output some test data for use in the test
-    test_input = torch.tensor([[1.0, 2.0, 3.0],
-                               [4.0, 5.0, 6.0]])
-    test_index = torch.tensor([0, 2], dtype=torch.int64)
-
-    print("Test input data: {}, {}".format(test_input, test_index))
-    output = model.forward(test_input, test_index)
-    print("Test output data: {}".format(output))
-
-
-if __name__ == '__main__':
-    main()
--- a/crates/burn-import/onnx-tests/tests/gather/gather_1d_idx.onnx
+++ b/crates/burn-import/onnx-tests/tests/gather/gather_1d_idx.onnx
--- a/crates/burn-import/onnx-tests/tests/gather/gather_1d_idx.py
+++ b/crates/burn-import/onnx-tests/tests/gather/gather_1d_idx.py
@ -0,0 +1,62 @@
+#!/usr/bin/env python3
+
+# used to generate model: onnx-tests/tests/gather/gather.onnx
+
+# There is no current support for `Split`, and the `for` loop over the indices
+# results in a `Split` node in the ONNX model.
+# Therefore, this model is built and exported using ONNX directly.
+
+import onnx
+
+
+def build_model():
+    return onnx.helper.make_model(
+        ir_version=8,
+        opset_imports=[onnx.helper.make_operatorsetid("", 16)],
+        graph=onnx.helper.make_graph(name="main_graph", nodes=[
+            onnx.helper.make_node(
+                "Gather",
+                inputs=["input1", "input2"],
+                outputs=["output1"],
+                name="/Gather",
+                axis=1
+            ),
+        ],
+        inputs=[
+            onnx.helper.make_value_info(
+                name="input1",
+                type_proto=onnx.helper.make_tensor_type_proto(
+                    elem_type=onnx.TensorProto.FLOAT, shape=[2, 3]
+                ),
+            ),
+            onnx.helper.make_value_info(
+                name="input2",
+                type_proto=onnx.helper.make_tensor_type_proto(
+                    elem_type=onnx.TensorProto.INT64, shape=[2]
+                ),
+            ),
+
+        ],
+        outputs=[
+            onnx.helper.make_value_info(
+                name="output1",
+                type_proto=onnx.helper.make_tensor_type_proto(
+                    elem_type=onnx.TensorProto.FLOAT, shape=[2, 2]
+                ),
+            )
+        ]),
+    )
+
+
+def main():
+    onnx_model = build_model()
+    file_name = "gather_1d_idx.onnx"
+
+    # Ensure valid ONNX:
+    onnx.checker.check_model(onnx_model)
+
+    onnx.save(onnx_model, file_name)
+
+
+if __name__ == '__main__':
+    main()
--- a/crates/burn-import/onnx-tests/tests/gather/gather_2d_idx.onnx
+++ b/crates/burn-import/onnx-tests/tests/gather/gather_2d_idx.onnx
--- a/crates/burn-import/onnx-tests/tests/gather/gather_2d_idx.py
+++ b/crates/burn-import/onnx-tests/tests/gather/gather_2d_idx.py
@ -0,0 +1,62 @@
+#!/usr/bin/env python3
+
+# used to generate model: onnx-tests/tests/gather/gather.onnx
+
+# There is no current support for `Split`, and the `for` loop over the indices
+# results in a `Split` node in the ONNX model.
+# Therefore, this model is built and exported using ONNX directly.
+
+import onnx
+
+
+def build_model():
+    return onnx.helper.make_model(
+        ir_version=8,
+        opset_imports=[onnx.helper.make_operatorsetid("", 16)],
+        graph=onnx.helper.make_graph(name="main_graph", nodes=[
+            onnx.helper.make_node(
+                "Gather",
+                inputs=["input1", "input2"],
+                outputs=["output1"],
+                name="/Gather",
+                axis=0
+            ),
+        ],
+        inputs=[
+            onnx.helper.make_value_info(
+                name="input1",
+                type_proto=onnx.helper.make_tensor_type_proto(
+                    elem_type=onnx.TensorProto.FLOAT, shape=[2, 3]
+                ),
+            ),
+            onnx.helper.make_value_info(
+                name="input2",
+                type_proto=onnx.helper.make_tensor_type_proto(
+                    elem_type=onnx.TensorProto.INT64, shape=[2, 2]
+                ),
+            ),
+
+        ],
+        outputs=[
+            onnx.helper.make_value_info(
+                name="output1",
+                type_proto=onnx.helper.make_tensor_type_proto(
+                    elem_type=onnx.TensorProto.FLOAT, shape=[2, 2, 2]
+                ),
+            )
+        ]),
+    )
+
+
+def main():
+    onnx_model = build_model()
+    file_name = "gather_2d_idx.onnx"
+
+    # Ensure valid ONNX:
+    onnx.checker.check_model(onnx_model)
+
+    onnx.save(onnx_model, file_name)
+
+
+if __name__ == '__main__':
+    main()
--- a/crates/burn-import/onnx-tests/tests/gather/gather_scalar.onnx
+++ b/crates/burn-import/onnx-tests/tests/gather/gather_scalar.onnx
--- a/crates/burn-import/onnx-tests/tests/gather/gather_scalar.py
+++ b/crates/burn-import/onnx-tests/tests/gather/gather_scalar.py
@ -2,45 +2,60 @@

 # used to generate model: onnx-tests/tests/gather/gather_scalar.onnx

-import torch
-import torch.nn as nn
+# There is no current support for `Split`, and the `for` loop over the indices
+# results in a `Split` node in the ONNX model.
+# Therefore, this model is built and exported using ONNX directly.
+
+import onnx


-class Model(nn.Module):
-    def __init__(self):
-        super(Model, self).__init__()
+def build_model():
+    return onnx.helper.make_model(
+        ir_version=8,
+        opset_imports=[onnx.helper.make_operatorsetid("", 16)],
+        graph=onnx.helper.make_graph(name="main_graph", nodes=[
+            onnx.helper.make_node(
+                "Gather",
+                inputs=["input1", "input2"],
+                outputs=["output1"],
+                name="/Gather",
+                axis=0
+            ),
+        ],
+        inputs=[
+            onnx.helper.make_value_info(
+                name="input1",
+                type_proto=onnx.helper.make_tensor_type_proto(
+                    elem_type=onnx.TensorProto.FLOAT, shape=[2, 3]
+                ),
+            ),
+            onnx.helper.make_value_info(
+                name="input2",
+                type_proto=onnx.helper.make_tensor_type_proto(
+                    elem_type=onnx.TensorProto.INT64, shape=[]
+                ),
+            ),

-    def forward(self, x, index):
-        gathered = torch.select(x, 0, index)
-        return gathered
+        ],
+        outputs=[
+            onnx.helper.make_value_info(
+                name="output1",
+                type_proto=onnx.helper.make_tensor_type_proto(
+                    elem_type=onnx.TensorProto.FLOAT, shape=[3]
+                ),
+            )
+        ]),
+    )


 def main():
-    # Set random seed for reproducibility
-    torch.manual_seed(0)
+    onnx_model = build_model()
+    file_name = "gather_scalar.onnx"

-    # Export to onnx
-    model = Model()
-    model.eval()
-    device = torch.device("cpu")
-    onnx_name = "gather_scalar.onnx"
+    # Ensure valid ONNX:
+    onnx.checker.check_model(onnx_model)

-    dummy_input = torch.randn(2, 3, device=device)
-    dummy_index = 0
-
-    torch.onnx.export(model, (dummy_input, dummy_index), onnx_name,
-                      verbose=False, opset_version=16)
-
-    print("Finished exporting model to {}".format(onnx_name))
-
-    # Output some test data for use in the test
-    test_input = torch.tensor([[1.0, 2.0, 3.0],
-                               [4.0, 5.0, 6.0]])
-    test_index = 0
-
-    print("Test input data: {}, {}".format(test_input, test_index))
-    output = model.forward(test_input, test_index)
-    print("Test output data: {}".format(output))
+    onnx.save(onnx_model, file_name)


 if __name__ == '__main__':
--- a/crates/burn-import/onnx-tests/tests/gather/gather_shape.py
+++ b/crates/burn-import/onnx-tests/tests/gather/gather_shape.py
@ -33,7 +33,7 @@ def build_model():
            onnx.helper.make_value_info(
                name="input1",
                type_proto=onnx.helper.make_tensor_type_proto(
-                    elem_type=onnx.TensorProto.FLOAT, shape=[2,3]
+                    elem_type=onnx.TensorProto.FLOAT, shape=[2, 3]
                ),
            ),
            onnx.helper.make_value_info(
@ -66,4 +66,4 @@ def main():


 if __name__ == "__main__":
-    main()
+    main()
--- a/crates/burn-import/onnx-tests/tests/onnx_tests.rs
+++ b/crates/burn-import/onnx-tests/tests/onnx_tests.rs
@ -41,7 +41,8 @@ include_models!(
    exp,
    expand,
    flatten,
-    gather,
+    gather_1d_idx,
+    gather_2d_idx,
    gather_scalar,
    gather_shape,
    gather_elements,
@ -451,15 +452,29 @@ mod tests {
    }

    #[test]
-    fn gather() {
-        let model: gather::Model<Backend> = gather::Model::default();
+    fn gather_1d_idx() {
+        let model: gather_1d_idx::Model<Backend> = gather_1d_idx::Model::default();

        let device = Default::default();

        let input = Tensor::<Backend, 2>::from_floats([[1., 2., 3.], [4., 5., 6.]], &device);
        let index = Tensor::<Backend, 1, Int>::from_ints([0, 2], &device);
-        let output = model.forward(input, index);
        let expected = TensorData::from([[1f32, 3.], [4., 6.]]);
+        let output = model.forward(input, index);
+
+        assert_eq!(output.to_data(), expected);
+    }
+
+    #[test]
+    fn gather_2d_idx() {
+        let model: gather_2d_idx::Model<Backend> = gather_2d_idx::Model::default();
+
+        let device = Default::default();
+
+        let input = Tensor::<Backend, 2>::from_data([[1.0, 1.2], [2.3, 3.4], [4.5, 5.7]], &device);
+        let index = Tensor::<Backend, 2, Int>::from_data([[0, 1], [1, 2]], &device);
+        let expected = TensorData::from([[[1f32, 1.2], [2.3, 3.4]], [[2.3, 3.4], [4.5, 5.7]]]);
+        let output = model.forward(input, index);

        assert_eq!(output.to_data(), expected);
    }
--- a/crates/burn-import/src/burn/node/gather.rs
+++ b/crates/burn-import/src/burn/node/gather.rs
@ -27,6 +27,12 @@ impl<PS: PrecisionSettings> NodeCodegen<PS> for GatherNode {
        node_position: usize,
    ) -> proc_macro2::TokenStream {
        let dim = self.dim.to_tokens();
+        let input_rank = match &self.input {
+            Type::Tensor(in_tensor) => in_tensor.dim,
+            Type::Shape(_) => 1,
+            _ => panic!("Gather needs Tensor or Shape input, got {:?}!", self.input),
+        };
+
        let input = match &self.input {
            Type::Tensor(in_tensor) => scope.tensor_use_owned(in_tensor, node_position),
            Type::Shape(in_shape) => {
@ -34,10 +40,11 @@ impl<PS: PrecisionSettings> NodeCodegen<PS> for GatherNode {
                // To copy just the values from the shape value without moving it
                // (which could lead to ownership problems if the same Shape is used multiple times)
                // borrow the array as a slice and use that to create the Tensor:
-                quote! { Tensor::from_data(&#in_shape_name as &[_], &*self.device) }
+                quote! { Tensor::<B, 1, Int>::from_data(&#in_shape_name as &[_], &*self.device) }
            }
            _ => panic!("Gather needs Scalar or Shape input, got {:?}!", self.input),
        };
+
        let output = &self.output.name;

        match &self.index {
@ -46,14 +53,41 @@ impl<PS: PrecisionSettings> NodeCodegen<PS> for GatherNode {
                // convert the 0-D index to a 1-D Tensor with len 1 to use burn's select,
                // then squeeze the dimension to reduce the rank
                let index = &idx_scalar.name;
+                let output_rank = input_rank - 1;
                quote! {
-                    let #output = #input.select(#dim, Tensor::from_data([#index], &*self.device)).squeeze(#dim);
+                    let indices = Tensor::<B, 1, _>::from_data([#index], &*self.device);
+                    let slice = Tensor::select(#input, #dim, indices);
+                    let #output = slice.squeeze::<#output_rank>(#dim);
                }
            }
            Type::Tensor(idx_tensor) => {
                let index = scope.tensor_use_owned(idx_tensor, node_position);
-                quote! {
-                    let #output = #input.select(#dim, #index);
+                let index_rank = idx_tensor.dim;
+                let output_rank = index_rank + input_rank - 1;
+                match index_rank {
+                    1 => quote! {
+                        let indices = #index;
+                        let #output = Tensor::select(#input, #dim, indices);
+                    },
+                    _ => quote! {
+                        let indices = #index;
+
+                        let n_dims = indices.dims().len();
+                        let index_flat = match n_dims {
+                            1 => indices.reshape([1, -1]),
+                            n if n >= 2 => indices.flatten::<2>(0, n - 2),
+                            _ => panic!("Number of dimensions must be greater than 0"),
+                        };
+
+                        let out = index_flat
+                            .iter_dim(0)
+                            .map(|idxs| {
+                                let idxs = idxs.squeeze::<1>(0);
+                                Tensor::select(#input.clone(), #dim, idxs)
+                            })
+                            .collect();
+                        let #output = Tensor::stack::<#output_rank>(out, #dim);
+                    },
                }
            }
            _ => panic!("Gather needs Scalar or Tensor index, got {:?}!", self.index),
@ -78,7 +112,7 @@ mod tests {
    };

    #[test]
-    fn test_codegen_gather() {
+    fn test_codegen_gather_1d_idx() {
        let mut graph = BurnGraph::<FullPrecisionSettings>::default();

        graph.register(GatherNode::new(
@ -121,8 +155,77 @@ mod tests {
                    tensor1: Tensor<B, 2>,
                    tensor2: Tensor<B, 1, Int>
                ) -> Tensor<B, 2> {
-                    let tensor3 = tensor1.select(0, tensor2);
+                    let indices = tensor2;
+                    let tensor3 = Tensor::select(tensor1, 0, indices);
+                    tensor3
+                }
+            }
+        };

+        assert_tokens(graph.codegen(), expected);
+    }
+
+    #[test]
+    fn test_codegen_gather_2d_idx() {
+        let mut graph = BurnGraph::<FullPrecisionSettings>::default();
+
+        graph.register(GatherNode::new(
+            Type::Tensor(TensorType::new_float("tensor1", 2)),
+            Type::Tensor(TensorType::new_int("tensor2", 2)),
+            TensorType::new_float("tensor3", 3),
+            0,
+        ));
+
+        graph.register_input_output(
+            vec!["tensor1".to_string(), "tensor2".to_string()],
+            vec!["tensor3".to_string()],
+        );
+
+        let expected = quote! {
+            use burn::tensor::Int;
+            use burn::{
+                module::Module,
+                tensor::{backend::Backend, Tensor},
+            };
+
+            #[derive(Module, Debug)]
+            pub struct Model<B: Backend> {
+                phantom: core::marker::PhantomData<B>,
+                device: burn::module::Ignored<B::Device>,
+            }
+
+            impl<B: Backend> Model <B> {
+                #[allow(unused_variables)]
+                pub fn new(device: &B::Device) -> Self {
+                    Self {
+                        phantom: core::marker::PhantomData,
+                        device: burn::module::Ignored(device.clone()),
+                    }
+                }
+
+                #[allow(clippy::let_and_return, clippy::approx_constant)]
+                pub fn forward(
+                    &self,
+                    tensor1: Tensor<B, 2>,
+                    tensor2: Tensor<B, 2, Int>
+                ) -> Tensor<B, 3> {
+                    let indices = tensor2;
+
+                    let n_dims = indices.dims().len();
+                    let index_flat = match n_dims {
+                        1 => indices.reshape([1, -1]),
+                        n if n >= 2 => indices.flatten::<2>(0, n - 2),
+                        _ => panic!("Number of dimensions must be greater than 0"),
+                    };
+
+                    let out = index_flat
+                        .iter_dim(0)
+                        .map(|idxs| {
+                            let idxs = idxs.squeeze::<1>(0);
+                            Tensor::select(tensor1.clone(), 0, idxs)
+                        })
+                        .collect();
+                    let tensor3 = Tensor::stack::<3usize>(out, 0);
                    tensor3
                }
            }
@ -138,7 +241,7 @@ mod tests {
        graph.register(GatherNode::new(
            Type::Shape(ShapeType::new("shape1", 3)),
            Type::Tensor(TensorType::new_int("tensor1", 1)),
-            TensorType::new_float("tensor2", 2),
+            TensorType::new_int("tensor2", 1),
            0,
        ));

@ -174,8 +277,14 @@ mod tests {
                    &self,
                    shape1: [usize; 3],
                    tensor1: Tensor<B, 1, Int>
-                ) -> Tensor<B, 2> {
-                    let tensor2 = Tensor::from_data(&shape1 as &[_], &*self.device).select(0, tensor1);
+                ) -> Tensor<B, 1, Int> {
+                    let indices = tensor1;
+
+                    let tensor2 = Tensor::select(
+                        Tensor::<B, 1, Int>::from_data(&shape1 as &[_], &*self.device),
+                        0,
+                        indices,
+                    );

                    tensor2
                }
@ -192,7 +301,7 @@ mod tests {
        graph.register(GatherNode::new(
            Type::Tensor(TensorType::new_float("tensor1", 2)),
            Type::Scalar(ScalarType::new("scalar1", ScalarKind::Int64)),
-            TensorType::new_float("tensor2", 2),
+            TensorType::new_float("tensor2", 1),
            0,
        ));

@ -227,8 +336,11 @@ mod tests {
                    &self,
                    tensor1: Tensor<B, 2>,
                    scalar1: i64
-                ) -> Tensor<B, 2> {
-                    let tensor2 = tensor1.select(0, Tensor::from_data([scalar1], &*self.device)).squeeze(0);
+                ) -> Tensor<B, 1> {
+                    let indices = Tensor::<B, 1, _>::from_data([scalar1], &*self.device);
+
+                    let slice = Tensor::select(tensor1, 0, indices);
+                    let tensor2 = slice.squeeze::<1usize>(0);

                    tensor2
                }
--- a/crates/burn-tensor/src/tensor/api/check.rs
+++ b/crates/burn-tensor/src/tensor/api/check.rs
@ -892,6 +892,7 @@ impl TensorCheck {

        check
    }
+
    pub(crate) fn check_prelu_shape<const D: usize>(
        shape_tensor: &Shape<D>,
        shape_weight: &Shape<1>,
--- a/crates/onnx-ir/src/dim_inference.rs
+++ b/crates/onnx-ir/src/dim_inference.rs
@ -816,10 +816,6 @@ fn gather_update_outputs(node: &mut Node) {
        _ => panic!("Only tensor indices is valid, got {:?}", node.inputs[1].ty),
    };

-    if indices_dim > 1 {
-        panic!("Gather: indices tensor rank above 1 not supported")
-    }
-
    match &node.inputs[0].ty {
        ArgType::Tensor(input_tensor) => {
            // Output of rank q+(r-1), where q is rank of indices tensor and r is rank of input