You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@tvm.apache.org by GitBox <gi...@apache.org> on 2021/02/19 06:27:17 UTC

[GitHub] [tvm] codeislife99 opened a new pull request #7477: SparseReshape Op

codeislife99 opened a new pull request #7477:
URL: https://github.com/apache/tvm/pull/7477


   Thanks for contributing to TVM!   Please refer to guideline https://tvm.apache.org/docs/contribute/ for useful information and tips. After the pull request is submitted, please request code reviews from [Reviewers](https://github.com/apache/incubator-tvm/blob/master/CONTRIBUTORS.md#reviewers) by @ them in the pull request thread.
   


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] codeislife99 commented on a change in pull request #7477: SparseReshape Op

Posted by GitBox <gi...@apache.org>.

codeislife99 commented on a change in pull request #7477:
URL: https://github.com/apache/tvm/pull/7477#discussion_r582413082



##########
File path: tests/python/relay/test_op_level3.py
##########
@@ -1311,6 +1311,232 @@ def verify_sparse_to_dense(sparse_indices, sparse_values, default_value, output_
     # verify_sparse_to_dense([[[[0, 1, 4], [0, 2, 4]]]], [[[[3.1, 3.1, 3.1]]]], 3.5, [5], [3.1, 3.1, 3.5, 3.5, 3.1])
 
 
+@tvm.testing.uses_gpu
+@pytest.mark.parametrize(
+    "sparse_indices_np, sparse_values_np, prev_shape_np, new_shape_np",
+    [
+        (
+            np.array([[0, 0, 0], [0, 0, 1], [0, 1, 0], [1, 0, 0], [1, 2, 3]], dtype=np.int64),
+            np.array([7, 5, 6, 3, 9], dtype=np.int64),
+            np.array([2, 3, 6], dtype=np.int64),
+            np.array([9, -1], dtype=np.int64),
+        ),
+        (
+            np.array(
+                [[0, 0, 0, 0], [0, 0, 1, 2], [0, 1, 0, 3], [1, 0, 0, 4], [1, 2, 3, 6]],
+                dtype=np.int64,
+            ),
+            np.array([7, 5, 6, 3, 9], dtype=np.int64),
+            np.array([2, 3, 6, 7], dtype=np.int64),
+            np.array([9, -1, 7], dtype=np.int64),
+        ),
+        (
+            np.array(
+                [
+                    [0, 0, 0, 0, 0],
+                    [0, 0, 1, 2, 3],
+                    [0, 1, 0, 3, 5],
+                    [1, 0, 0, 4, 6],
+                    [1, 2, 3, 6, 8],
+                ],
+                dtype=np.int64,
+            ),
+            np.array([7, 5, 6, 3, 9], dtype=np.int64),
+            np.array([2, 3, 6, 7, 9], dtype=np.int64),
+            np.array([9, -1, 7], dtype=np.int64),
+        ),
+        (
+            np.array([[0, 0], [0, 1], [3, 4], [4, 3], [7, 3]], dtype=np.int64),
+            np.array([7, 5, 6, 3, 9], dtype=np.int64),
+            np.array([9, 4], dtype=np.int64),
+            np.array([2, -1, 6], dtype=np.int64),
+        ),
+        (
+            np.array([[0, 0], [0, 1], [3, 4], [4, 3], [7, 3]], dtype=np.int64),
+            np.array([7, 5, 6, 3, 9], dtype=np.int64),
+            np.array([9, 4], dtype=np.int64),
+            np.array([-1], dtype=np.int64),
+        ),
+        (
+            np.array([[0], [5], [10], [20], [24]], dtype=np.int64),
+            np.array([7, 5, 6, 3, 9], dtype=np.int64),
+            np.array([25], dtype=np.int64),
+            np.array([5, 5], dtype=np.int64),
+        ),
+        (
+            np.array([[0, 100], [200, 100], [300, 400], [50, 20], [400, 50]], dtype=np.int64),
+            np.array([7, 5, 6, 3, 9], dtype=np.int64),
+            np.array([500, 20], dtype=np.int64),
+            np.array([500, 20], dtype=np.int64),
+        ),
+        (
+            np.array([[0, 100], [200, 100], [300, 400], [50, 20], [400, 50]], dtype=np.int64),
+            np.array([7, 5, 6, 3, 9], dtype=np.int64),
+            np.array([500, 20], dtype=np.int64),
+            np.array([500, -1], dtype=np.int64),
+        ),
+        (
+            np.array([[0, 100], [200, 100], [300, 400], [50, 20], [400, 50]], dtype=np.int64),
+            np.array([7, 5, 6, 3, 9], dtype=np.int64),
+            np.array([500, 20], dtype=np.int64),
+            np.array([250, 40], dtype=np.int64),
+        ),
+        (
+            np.ones((0, 1), dtype=np.int64),
+            np.array([], dtype=np.int64),
+            np.array([4], dtype=np.int64),
+            np.array([2, -1], dtype=np.int64),
+        ),
+        (
+            np.ones((0, 1), dtype=np.int64),
+            np.array([], dtype=np.int64),
+            np.array([4], dtype=np.int64),
+            np.array([2, 2], dtype=np.int64),
+        ),
+        (
+            np.ones((0, 2), dtype=np.int64),
+            np.array([], dtype=np.int64),
+            np.array([3, 6], dtype=np.int64),
+            np.array([-1, 2], dtype=np.int64),
+        ),
+    ],
+)
+@pytest.mark.parametrize("dtype", [np.int32, np.int64])
+@pytest.mark.parametrize("use_dyn", [True, False])
+def test_sparse_reshape(

Review comment:
       Done.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] codeislife99 commented on a change in pull request #7477: SparseReshape Op

Posted by GitBox <gi...@apache.org>.

codeislife99 commented on a change in pull request #7477:
URL: https://github.com/apache/tvm/pull/7477#discussion_r580687388



##########
File path: python/tvm/topi/cuda/sparse_reshape.py
##########
@@ -0,0 +1,232 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# pylint: disable=invalid-name, too-many-arguments, too-many-nested-blocks
+"""Sparse_Reshape operator"""
+import tvm
+from tvm import te
+from ...tir import decl_buffer, ir_builder, Cast
+from ...te import extern, div, floordiv, floormod
+from ..utils import ceil_div
+
+
+def sparse_reshape(
+    sparse_indices,
+    prev_shape,
+    new_shape,
+    new_sparse_indices_shape,
+    new_shape_shape,
+):
+    """
+    Reshape a Sparse Tensor
+    Parameters
+    ----------
+    sparse_indices : relay.Expr
+        A 2-D tensor[N, n_dim] of integers containing location of sparse values, where N is the
+        number of sparse values and n_dim is the number of dimensions of the dense_shape
+    prev_shape : relay.Expr
+        A 1-D tensor containing the previous shape of the dense tensor
+    new_shape : relay.Expr
+        A 1-D tensor containing the new shape of the dense tensor
+    Returns
+    -------
+    result: relay.Expr
+        Output tensor.
+    Examples
+    --------
+    .. code-block:: python
+        sparse_indices = [[0, 0, 0],
+                            [0, 0, 1],
+                            [0, 1, 0],
+                            [1, 0, 0],
+                            [1, 2, 3]]
+        prev_shape = [2, 3, 4]
+        new_shape = [9, -1]
+        new_sparse_indices, new_shape = relay.sparse_reshape(sparse_indices,
+                            prev_shape,
+                            new_shape)
+        new_sparse_indices = [[0, 0],
+                              [0, 1],
+                              [1, 2],
+                              [4, 2],
+                              [8, 1]]
+        new_shape = [9, 4]
+    """
+
+    def gen_ir(
+        sparse_indices_ptr,
+        prev_shape_ptr,
+        new_shape_ptr,
+        new_sparse_indices_ptr,
+        out_new_shape_ptr,
+    ):
+        ib = ir_builder.create()
+
+        sparse_indices = ib.buffer_ptr(sparse_indices_ptr)
+        prev_shape = ib.buffer_ptr(prev_shape_ptr)
+
+        new_shape = ib.buffer_ptr(new_shape_ptr)
+        out_new_shape = ib.buffer_ptr(out_new_shape_ptr)
+        new_sparse_indices = ib.buffer_ptr(new_sparse_indices_ptr)
+        out_new_shape = ib.buffer_ptr(out_new_shape_ptr)
+
+        prev_shape_size = prev_shape_ptr.shape[0]
+        new_shape_size = new_shape_ptr.shape[0]
+
+        multipliers = ib.allocate("int64", (prev_shape_size,), name="multipliers", scope="local")
+        dividers = ib.allocate("int64", (new_shape_size,), name="dividers", scope="local")
+        flattened_indices = ib.allocate(
+            "int64", (sparse_indices_ptr.shape[0],), name="flattened_indices", scope="local"
+        )
+        total_ele = ib.allocate("int64", (1,), name="total_ele", scope="local")
+        division_total_ele = ib.allocate("int64", (1,), name="division_total_ele", scope="local")
+        equal_shape = ib.allocate("bool", (1,), name="equal_shape", scope="local")
+        max_threads = int(tvm.target.Target.current(allow_none=False).max_num_threads)
+        with ib.new_scope():
+            # The computation in this block is very very miniscule since we are just iterating over
+            # shape tensors which are very small (< 10) and there is no need of parallelization
+            nthread_tx = 1
+            nthread_bx = 1
+            tx = te.thread_axis("threadIdx.x")
+            bx = te.thread_axis("blockIdx.x")
+            ib.scope_attr(tx, "thread_extent", nthread_tx)
+            ib.scope_attr(bx, "thread_extent", nthread_bx)
+
+            total_ele[0] = prev_shape[0]
+
+            # Cumulative Reverse Exclusive Multiply
+            multipliers[prev_shape_size - 1] = Cast("int64", 1)
+            with ib.for_range(0, prev_shape_size - 1) as i_:
+                i = i_ + 1
+                multipliers[prev_shape_size - 1 - i] = (
+                    prev_shape[prev_shape_size - i] * multipliers[prev_shape_size - i]
+                )
+                total_ele[0] *= prev_shape[prev_shape_size - i]
+
+            division_total_ele[0] = Cast("int64", 1)
+            with ib.for_range(0, new_shape_size) as i:
+                with ib.if_scope(new_shape[i] != -1):
+                    division_total_ele[0] *= new_shape[i]
+
+            # Compute true output shape (replace negative ones)
+            with ib.for_range(0, new_shape_size) as i:
+                with ib.if_scope(new_shape[i] == -1):
+                    out_new_shape[i] = Cast("int64", div(total_ele[0], division_total_ele[0]))
+                with ib.else_scope():
+                    out_new_shape[i] = new_shape[i]
+
+            # Check if prev_shape and new_shape are equal
+            equal_shape[0] = True
+            with ib.if_scope(prev_shape_size == new_shape_size):
+                with ib.for_range(0, prev_shape_size) as i:
+                    with ib.if_scope(prev_shape[i] != out_new_shape[i]):
+                        equal_shape[0] = False
+            with ib.else_scope():
+                equal_shape[0] = False
+

Review comment:
       It seems that I cannot run separate kernels inside if and else, hence I put the kernel at the top of the if and else scope . @mbrookhart Can you share a small pseudocode snippet of how I should go about what you are suggesting 




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] comaniac merged pull request #7477: SparseReshape Op

Posted by GitBox <gi...@apache.org>.

comaniac merged pull request #7477:
URL: https://github.com/apache/tvm/pull/7477


   


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] codeislife99 commented on a change in pull request #7477: SparseReshape Op

Posted by GitBox <gi...@apache.org>.

codeislife99 commented on a change in pull request #7477:
URL: https://github.com/apache/tvm/pull/7477#discussion_r582393362



##########
File path: python/tvm/topi/cuda/sparse_reshape.py
##########
@@ -0,0 +1,232 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# pylint: disable=invalid-name, too-many-arguments, too-many-nested-blocks
+"""Sparse_Reshape operator"""
+import tvm
+from tvm import te
+from ...tir import decl_buffer, ir_builder, Cast
+from ...te import extern, div, floordiv, floormod
+from ..utils import ceil_div
+
+
+def sparse_reshape(
+    sparse_indices,
+    prev_shape,
+    new_shape,
+    new_sparse_indices_shape,
+    new_shape_shape,
+):
+    """
+    Reshape a Sparse Tensor
+    Parameters
+    ----------
+    sparse_indices : relay.Expr
+        A 2-D tensor[N, n_dim] of integers containing location of sparse values, where N is the
+        number of sparse values and n_dim is the number of dimensions of the dense_shape
+    prev_shape : relay.Expr
+        A 1-D tensor containing the previous shape of the dense tensor
+    new_shape : relay.Expr
+        A 1-D tensor containing the new shape of the dense tensor
+    Returns
+    -------
+    result: relay.Expr
+        Output tensor.
+    Examples
+    --------
+    .. code-block:: python
+        sparse_indices = [[0, 0, 0],
+                            [0, 0, 1],
+                            [0, 1, 0],
+                            [1, 0, 0],
+                            [1, 2, 3]]
+        prev_shape = [2, 3, 4]
+        new_shape = [9, -1]
+        new_sparse_indices, new_shape = relay.sparse_reshape(sparse_indices,
+                            prev_shape,
+                            new_shape)
+        new_sparse_indices = [[0, 0],
+                              [0, 1],
+                              [1, 2],
+                              [4, 2],
+                              [8, 1]]
+        new_shape = [9, 4]
+    """
+
+    def gen_ir(
+        sparse_indices_ptr,
+        prev_shape_ptr,
+        new_shape_ptr,
+        new_sparse_indices_ptr,
+        out_new_shape_ptr,
+    ):
+        ib = ir_builder.create()
+
+        sparse_indices = ib.buffer_ptr(sparse_indices_ptr)
+        prev_shape = ib.buffer_ptr(prev_shape_ptr)
+
+        new_shape = ib.buffer_ptr(new_shape_ptr)
+        out_new_shape = ib.buffer_ptr(out_new_shape_ptr)
+        new_sparse_indices = ib.buffer_ptr(new_sparse_indices_ptr)
+        out_new_shape = ib.buffer_ptr(out_new_shape_ptr)
+
+        prev_shape_size = prev_shape_ptr.shape[0]
+        new_shape_size = new_shape_ptr.shape[0]
+
+        multipliers = ib.allocate("int64", (prev_shape_size,), name="multipliers", scope="local")
+        dividers = ib.allocate("int64", (new_shape_size,), name="dividers", scope="local")
+        flattened_indices = ib.allocate(
+            "int64", (sparse_indices_ptr.shape[0],), name="flattened_indices", scope="local"
+        )
+        total_ele = ib.allocate("int64", (1,), name="total_ele", scope="local")
+        division_total_ele = ib.allocate("int64", (1,), name="division_total_ele", scope="local")
+        equal_shape = ib.allocate("bool", (1,), name="equal_shape", scope="local")
+        max_threads = int(tvm.target.Target.current(allow_none=False).max_num_threads)
+        with ib.new_scope():
+            # The computation in this block is very very miniscule since we are just iterating over
+            # shape tensors which are very small (< 10) and there is no need of parallelization
+            nthread_tx = 1
+            nthread_bx = 1
+            tx = te.thread_axis("threadIdx.x")
+            bx = te.thread_axis("blockIdx.x")
+            ib.scope_attr(tx, "thread_extent", nthread_tx)
+            ib.scope_attr(bx, "thread_extent", nthread_bx)
+
+            total_ele[0] = prev_shape[0]
+
+            # Cumulative Reverse Exclusive Multiply
+            multipliers[prev_shape_size - 1] = Cast("int64", 1)
+            with ib.for_range(0, prev_shape_size - 1) as i_:
+                i = i_ + 1
+                multipliers[prev_shape_size - 1 - i] = (
+                    prev_shape[prev_shape_size - i] * multipliers[prev_shape_size - i]
+                )
+                total_ele[0] *= prev_shape[prev_shape_size - i]
+
+            division_total_ele[0] = Cast("int64", 1)
+            with ib.for_range(0, new_shape_size) as i:
+                with ib.if_scope(new_shape[i] != -1):
+                    division_total_ele[0] *= new_shape[i]
+
+            # Compute true output shape (replace negative ones)
+            with ib.for_range(0, new_shape_size) as i:
+                with ib.if_scope(new_shape[i] == -1):
+                    out_new_shape[i] = Cast("int64", div(total_ele[0], division_total_ele[0]))
+                with ib.else_scope():
+                    out_new_shape[i] = new_shape[i]
+
+            # Check if prev_shape and new_shape are equal
+            equal_shape[0] = True
+            with ib.if_scope(prev_shape_size == new_shape_size):
+                with ib.for_range(0, prev_shape_size) as i:
+                    with ib.if_scope(prev_shape[i] != out_new_shape[i]):
+                        equal_shape[0] = False
+            with ib.else_scope():
+                equal_shape[0] = False
+
+        with ib.new_scope():
+            nthread_tx = max_threads
+            nthread_bx = ceil_div(sparse_indices_ptr.shape[0], max_threads)
+            tx = te.thread_axis("threadIdx.x")
+            bx = te.thread_axis("blockIdx.x")
+            ib.scope_attr(tx, "thread_extent", nthread_tx)
+            ib.scope_attr(bx, "thread_extent", nthread_bx)
+
+            row_number = bx * max_threads + tx
+
+            # Return same inputs if shapes are equal
+            with ib.if_scope(equal_shape[0]):
+                with ib.if_scope(row_number < sparse_indices_ptr.shape[0]):
+                    with ib.for_range(0, sparse_indices_ptr.shape[1]) as j:
+                        new_sparse_indices[row_number, j] = sparse_indices[row_number, j]
+
+            # Else compute new_sparse_indices
+            with ib.else_scope():
+                dividers[new_shape_size - 1] = Cast("int64", 1)
+                with ib.for_range(0, new_shape_size - 1) as i_:
+                    i = i_ + 1
+                    dividers[new_shape_size - 1 - i] = (
+                        dividers[new_shape_size - i] * out_new_shape[new_shape_size - i]
+                    )
+
+                with ib.if_scope(row_number < sparse_indices_ptr.shape[0]):
+                    flattened_indices[row_number] = Cast("int64", 0)
+                    with ib.for_range(0, sparse_indices_ptr.shape[1]) as j:
+                        flattened_indices[row_number] += (
+                            sparse_indices[row_number, j] * multipliers[j]
+                        )
+
+                with ib.if_scope(row_number < sparse_indices_ptr.shape[0]):
+                    current_element = ib.allocate(
+                        "int64", (1,), name="current_element", scope="local"
+                    )
+                    current_element[0] = flattened_indices[row_number]
+
+                    with ib.for_range(0, new_sparse_indices_ptr.shape[1]) as j:
+                        new_sparse_indices[row_number, j] = Cast(
+                            "int64", floordiv(current_element[0], dividers[j])
+                        )
+                        current_element[0] = floormod(current_element[0], dividers[j])
+
+        return ib.get()
+
+    new_sparse_indices_buf = decl_buffer(
+        new_sparse_indices_shape, "int64", "new_sparse_indices_buf"
+    )
+    new_shape_buf = decl_buffer(new_shape_shape, "int64", "new_shape_buf")
+
+    return extern(
+        [new_sparse_indices_shape, new_shape_shape],
+        [sparse_indices, prev_shape, new_shape],
+        lambda ins, outs: gen_ir(ins[0], ins[1], ins[2], outs[0], outs[1]),
+        dtype="int64",

Review comment:
       Done. 




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] codeislife99 commented on a change in pull request #7477: SparseReshape Op

Posted by GitBox <gi...@apache.org>.

codeislife99 commented on a change in pull request #7477:
URL: https://github.com/apache/tvm/pull/7477#discussion_r583229740



##########
File path: python/tvm/topi/sparse_reshape.py
##########
@@ -0,0 +1,185 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# pylint: disable=invalid-name, too-many-arguments, too-many-nested-blocks
+"""Sparse_Reshape operator"""
+from ..tir import decl_buffer, ir_builder, Cast
+from ..te import extern, div, floordiv, floormod
+
+
+def sparse_reshape(
+    sparse_indices,
+    prev_shape,
+    new_shape,
+    new_sparse_indices_shape,
+    new_shape_shape,
+):
+    """
+    Reshape a Sparse Tensor
+    Parameters
+    ----------
+    sparse_indices : relay.Expr
+        A 2-D tensor[N, n_dim] of integers containing location of sparse values, where N is the
+        number of sparse values and n_dim is the number of dimensions of the dense_shape
+    prev_shape : relay.Expr
+        A 1-D tensor containing the previous shape of the dense tensor
+    new_shape : relay.Expr
+        A 1-D tensor containing the new shape of the dense tensor
+    Returns
+    -------
+    result: relay.Expr
+        Output tensor.
+    Examples
+    --------
+    .. code-block:: python
+        sparse_indices = [[0, 0, 0],
+                            [0, 0, 1],
+                            [0, 1, 0],
+                            [1, 0, 0],
+                            [1, 2, 3]]
+        prev_shape = [2, 3, 4]
+        new_shape = [9, -1]
+        new_sparse_indices, new_shape = relay.sparse_reshape(sparse_indices,
+                            prev_shape,
+                            new_shape)
+        new_sparse_indices = [[0, 0],
+                              [0, 1],
+                              [1, 2],
+                              [4, 2],
+                              [8, 1]]
+        new_shape = [9, 4]
+    """
+
+    def gen_ir(

Review comment:
       1. Yeah , you are right, I removed a few cases which I thought were redundant. 
   2. Parallelized the CPU loops . Thanks :) 




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] codeislife99 commented on a change in pull request #7477: SparseReshape Op

Posted by GitBox <gi...@apache.org>.

codeislife99 commented on a change in pull request #7477:
URL: https://github.com/apache/tvm/pull/7477#discussion_r582412000



##########
File path: tests/python/relay/test_op_level3.py
##########
@@ -1311,6 +1311,232 @@ def verify_sparse_to_dense(sparse_indices, sparse_values, default_value, output_
     # verify_sparse_to_dense([[[[0, 1, 4], [0, 2, 4]]]], [[[[3.1, 3.1, 3.1]]]], 3.5, [5], [3.1, 3.1, 3.5, 3.5, 3.1])
 
 
+@tvm.testing.uses_gpu
+@pytest.mark.parametrize(
+    "sparse_indices_np, sparse_values_np, prev_shape_np, new_shape_np",
+    [
+        (
+            np.array([[0, 0, 0], [0, 0, 1], [0, 1, 0], [1, 0, 0], [1, 2, 3]], dtype=np.int64),
+            np.array([7, 5, 6, 3, 9], dtype=np.int64),
+            np.array([2, 3, 6], dtype=np.int64),
+            np.array([9, -1], dtype=np.int64),
+        ),
+        (
+            np.array(
+                [[0, 0, 0, 0], [0, 0, 1, 2], [0, 1, 0, 3], [1, 0, 0, 4], [1, 2, 3, 6]],
+                dtype=np.int64,
+            ),
+            np.array([7, 5, 6, 3, 9], dtype=np.int64),
+            np.array([2, 3, 6, 7], dtype=np.int64),
+            np.array([9, -1, 7], dtype=np.int64),
+        ),
+        (
+            np.array(
+                [
+                    [0, 0, 0, 0, 0],
+                    [0, 0, 1, 2, 3],
+                    [0, 1, 0, 3, 5],
+                    [1, 0, 0, 4, 6],
+                    [1, 2, 3, 6, 8],
+                ],
+                dtype=np.int64,
+            ),
+            np.array([7, 5, 6, 3, 9], dtype=np.int64),
+            np.array([2, 3, 6, 7, 9], dtype=np.int64),
+            np.array([9, -1, 7], dtype=np.int64),
+        ),
+        (
+            np.array([[0, 0], [0, 1], [3, 4], [4, 3], [7, 3]], dtype=np.int64),
+            np.array([7, 5, 6, 3, 9], dtype=np.int64),
+            np.array([9, 4], dtype=np.int64),
+            np.array([2, -1, 6], dtype=np.int64),
+        ),
+        (
+            np.array([[0, 0], [0, 1], [3, 4], [4, 3], [7, 3]], dtype=np.int64),
+            np.array([7, 5, 6, 3, 9], dtype=np.int64),
+            np.array([9, 4], dtype=np.int64),
+            np.array([-1], dtype=np.int64),
+        ),
+        (
+            np.array([[0], [5], [10], [20], [24]], dtype=np.int64),
+            np.array([7, 5, 6, 3, 9], dtype=np.int64),
+            np.array([25], dtype=np.int64),
+            np.array([5, 5], dtype=np.int64),
+        ),
+        (
+            np.array([[0, 100], [200, 100], [300, 400], [50, 20], [400, 50]], dtype=np.int64),
+            np.array([7, 5, 6, 3, 9], dtype=np.int64),
+            np.array([500, 20], dtype=np.int64),
+            np.array([500, 20], dtype=np.int64),
+        ),
+        (
+            np.array([[0, 100], [200, 100], [300, 400], [50, 20], [400, 50]], dtype=np.int64),
+            np.array([7, 5, 6, 3, 9], dtype=np.int64),
+            np.array([500, 20], dtype=np.int64),
+            np.array([500, -1], dtype=np.int64),
+        ),
+        (
+            np.array([[0, 100], [200, 100], [300, 400], [50, 20], [400, 50]], dtype=np.int64),
+            np.array([7, 5, 6, 3, 9], dtype=np.int64),
+            np.array([500, 20], dtype=np.int64),
+            np.array([250, 40], dtype=np.int64),
+        ),
+        (
+            np.ones((0, 1), dtype=np.int64),
+            np.array([], dtype=np.int64),
+            np.array([4], dtype=np.int64),
+            np.array([2, -1], dtype=np.int64),
+        ),
+        (
+            np.ones((0, 1), dtype=np.int64),
+            np.array([], dtype=np.int64),
+            np.array([4], dtype=np.int64),
+            np.array([2, 2], dtype=np.int64),
+        ),
+        (
+            np.ones((0, 2), dtype=np.int64),
+            np.array([], dtype=np.int64),
+            np.array([3, 6], dtype=np.int64),
+            np.array([-1, 2], dtype=np.int64),
+        ),
+    ],
+)
+@pytest.mark.parametrize("dtype", [np.int32, np.int64])
+@pytest.mark.parametrize("use_dyn", [True, False])
+def test_sparse_reshape(
+    sparse_indices_np, sparse_values_np, prev_shape_np, new_shape_np, dtype, use_dyn
+):
+    def ref_sparse_reshape(

Review comment:
       I already have the tests with the TF Frontend code. They should be known good outputs. And I have copied over the same tests here ? Do you think that satisfies the correct of the numpy code ? 




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] mbrookhart commented on a change in pull request #7477: SparseReshape Op

Posted by GitBox <gi...@apache.org>.

mbrookhart commented on a change in pull request #7477:
URL: https://github.com/apache/tvm/pull/7477#discussion_r583167241



##########
File path: tests/python/relay/test_op_level3.py
##########
@@ -1311,6 +1311,232 @@ def verify_sparse_to_dense(sparse_indices, sparse_values, default_value, output_
     # verify_sparse_to_dense([[[[0, 1, 4], [0, 2, 4]]]], [[[[3.1, 3.1, 3.1]]]], 3.5, [5], [3.1, 3.1, 3.5, 3.5, 3.1])
 
 
+@tvm.testing.uses_gpu
+@pytest.mark.parametrize(
+    "sparse_indices_np, sparse_values_np, prev_shape_np, new_shape_np",
+    [
+        (
+            np.array([[0, 0, 0], [0, 0, 1], [0, 1, 0], [1, 0, 0], [1, 2, 3]], dtype=np.int64),
+            np.array([7, 5, 6, 3, 9], dtype=np.int64),
+            np.array([2, 3, 6], dtype=np.int64),
+            np.array([9, -1], dtype=np.int64),
+        ),
+        (
+            np.array(
+                [[0, 0, 0, 0], [0, 0, 1, 2], [0, 1, 0, 3], [1, 0, 0, 4], [1, 2, 3, 6]],
+                dtype=np.int64,
+            ),
+            np.array([7, 5, 6, 3, 9], dtype=np.int64),
+            np.array([2, 3, 6, 7], dtype=np.int64),
+            np.array([9, -1, 7], dtype=np.int64),
+        ),
+        (
+            np.array(
+                [
+                    [0, 0, 0, 0, 0],
+                    [0, 0, 1, 2, 3],
+                    [0, 1, 0, 3, 5],
+                    [1, 0, 0, 4, 6],
+                    [1, 2, 3, 6, 8],
+                ],
+                dtype=np.int64,
+            ),
+            np.array([7, 5, 6, 3, 9], dtype=np.int64),
+            np.array([2, 3, 6, 7, 9], dtype=np.int64),
+            np.array([9, -1, 7], dtype=np.int64),
+        ),
+        (
+            np.array([[0, 0], [0, 1], [3, 4], [4, 3], [7, 3]], dtype=np.int64),
+            np.array([7, 5, 6, 3, 9], dtype=np.int64),
+            np.array([9, 4], dtype=np.int64),
+            np.array([2, -1, 6], dtype=np.int64),
+        ),
+        (
+            np.array([[0, 0], [0, 1], [3, 4], [4, 3], [7, 3]], dtype=np.int64),
+            np.array([7, 5, 6, 3, 9], dtype=np.int64),
+            np.array([9, 4], dtype=np.int64),
+            np.array([-1], dtype=np.int64),
+        ),
+        (
+            np.array([[0], [5], [10], [20], [24]], dtype=np.int64),
+            np.array([7, 5, 6, 3, 9], dtype=np.int64),
+            np.array([25], dtype=np.int64),
+            np.array([5, 5], dtype=np.int64),
+        ),
+        (
+            np.array([[0, 100], [200, 100], [300, 400], [50, 20], [400, 50]], dtype=np.int64),
+            np.array([7, 5, 6, 3, 9], dtype=np.int64),
+            np.array([500, 20], dtype=np.int64),
+            np.array([500, 20], dtype=np.int64),
+        ),
+        (
+            np.array([[0, 100], [200, 100], [300, 400], [50, 20], [400, 50]], dtype=np.int64),
+            np.array([7, 5, 6, 3, 9], dtype=np.int64),
+            np.array([500, 20], dtype=np.int64),
+            np.array([500, -1], dtype=np.int64),
+        ),
+        (
+            np.array([[0, 100], [200, 100], [300, 400], [50, 20], [400, 50]], dtype=np.int64),
+            np.array([7, 5, 6, 3, 9], dtype=np.int64),
+            np.array([500, 20], dtype=np.int64),
+            np.array([250, 40], dtype=np.int64),
+        ),
+        (
+            np.ones((0, 1), dtype=np.int64),
+            np.array([], dtype=np.int64),
+            np.array([4], dtype=np.int64),
+            np.array([2, -1], dtype=np.int64),
+        ),
+        (
+            np.ones((0, 1), dtype=np.int64),
+            np.array([], dtype=np.int64),
+            np.array([4], dtype=np.int64),
+            np.array([2, 2], dtype=np.int64),
+        ),
+        (
+            np.ones((0, 2), dtype=np.int64),
+            np.array([], dtype=np.int64),
+            np.array([3, 6], dtype=np.int64),
+            np.array([-1, 2], dtype=np.int64),
+        ),
+    ],
+)
+@pytest.mark.parametrize("dtype", [np.int32, np.int64])
+@pytest.mark.parametrize("use_dyn", [True, False])
+def test_sparse_reshape(
+    sparse_indices_np, sparse_values_np, prev_shape_np, new_shape_np, dtype, use_dyn
+):
+    def ref_sparse_reshape(

Review comment:
       Probably not worth it to duplicate the tests between TF and topi? Maybe run a subset?

##########
File path: python/tvm/topi/sparse_reshape.py
##########
@@ -0,0 +1,185 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# pylint: disable=invalid-name, too-many-arguments, too-many-nested-blocks
+"""Sparse_Reshape operator"""
+from ..tir import decl_buffer, ir_builder, Cast
+from ..te import extern, div, floordiv, floormod
+
+
+def sparse_reshape(
+    sparse_indices,
+    prev_shape,
+    new_shape,
+    new_sparse_indices_shape,
+    new_shape_shape,
+):
+    """
+    Reshape a Sparse Tensor
+    Parameters
+    ----------
+    sparse_indices : relay.Expr
+        A 2-D tensor[N, n_dim] of integers containing location of sparse values, where N is the
+        number of sparse values and n_dim is the number of dimensions of the dense_shape
+    prev_shape : relay.Expr
+        A 1-D tensor containing the previous shape of the dense tensor
+    new_shape : relay.Expr
+        A 1-D tensor containing the new shape of the dense tensor
+    Returns
+    -------
+    result: relay.Expr
+        Output tensor.
+    Examples
+    --------
+    .. code-block:: python
+        sparse_indices = [[0, 0, 0],
+                            [0, 0, 1],
+                            [0, 1, 0],
+                            [1, 0, 0],
+                            [1, 2, 3]]
+        prev_shape = [2, 3, 4]
+        new_shape = [9, -1]
+        new_sparse_indices, new_shape = relay.sparse_reshape(sparse_indices,
+                            prev_shape,
+                            new_shape)
+        new_sparse_indices = [[0, 0],
+                              [0, 1],
+                              [1, 2],
+                              [4, 2],
+                              [8, 1]]
+        new_shape = [9, 4]
+    """
+
+    def gen_ir(

Review comment:
       I'm wondering if it would help to parallelize some of the for loops on CPU like we do on GPU? On CPU you can use the parallel tag in ir_builder.for_range. Not sure if this is enough of a performance improvement to justify it.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] codeislife99 commented on pull request #7477: SparseReshape Op

Posted by GitBox <gi...@apache.org>.

codeislife99 commented on pull request #7477:
URL: https://github.com/apache/tvm/pull/7477#issuecomment-786303375


   @tkonolige All your comments have been resolved. Any other changes you'd like me to make  ? 


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] tkonolige commented on a change in pull request #7477: SparseReshape Op

Posted by GitBox <gi...@apache.org>.

tkonolige commented on a change in pull request #7477:
URL: https://github.com/apache/tvm/pull/7477#discussion_r580612206



##########
File path: python/tvm/topi/cuda/sparse_reshape.py
##########
@@ -0,0 +1,232 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# pylint: disable=invalid-name, too-many-arguments, too-many-nested-blocks
+"""Sparse_Reshape operator"""
+import tvm
+from tvm import te
+from ...tir import decl_buffer, ir_builder, Cast
+from ...te import extern, div, floordiv, floormod
+from ..utils import ceil_div
+
+
+def sparse_reshape(
+    sparse_indices,
+    prev_shape,
+    new_shape,
+    new_sparse_indices_shape,
+    new_shape_shape,
+):
+    """
+    Reshape a Sparse Tensor
+    Parameters
+    ----------
+    sparse_indices : relay.Expr
+        A 2-D tensor[N, n_dim] of integers containing location of sparse values, where N is the
+        number of sparse values and n_dim is the number of dimensions of the dense_shape
+    prev_shape : relay.Expr
+        A 1-D tensor containing the previous shape of the dense tensor
+    new_shape : relay.Expr
+        A 1-D tensor containing the new shape of the dense tensor
+    Returns
+    -------
+    result: relay.Expr
+        Output tensor.
+    Examples
+    --------
+    .. code-block:: python
+        sparse_indices = [[0, 0, 0],
+                            [0, 0, 1],
+                            [0, 1, 0],
+                            [1, 0, 0],
+                            [1, 2, 3]]
+        prev_shape = [2, 3, 4]
+        new_shape = [9, -1]
+        new_sparse_indices, new_shape = relay.sparse_reshape(sparse_indices,
+                            prev_shape,
+                            new_shape)
+        new_sparse_indices = [[0, 0],
+                              [0, 1],
+                              [1, 2],
+                              [4, 2],
+                              [8, 1]]
+        new_shape = [9, 4]
+    """
+
+    def gen_ir(
+        sparse_indices_ptr,
+        prev_shape_ptr,
+        new_shape_ptr,
+        new_sparse_indices_ptr,
+        out_new_shape_ptr,
+    ):
+        ib = ir_builder.create()
+
+        sparse_indices = ib.buffer_ptr(sparse_indices_ptr)
+        prev_shape = ib.buffer_ptr(prev_shape_ptr)
+
+        new_shape = ib.buffer_ptr(new_shape_ptr)
+        out_new_shape = ib.buffer_ptr(out_new_shape_ptr)
+        new_sparse_indices = ib.buffer_ptr(new_sparse_indices_ptr)
+        out_new_shape = ib.buffer_ptr(out_new_shape_ptr)
+
+        prev_shape_size = prev_shape_ptr.shape[0]
+        new_shape_size = new_shape_ptr.shape[0]
+
+        multipliers = ib.allocate("int64", (prev_shape_size,), name="multipliers", scope="local")
+        dividers = ib.allocate("int64", (new_shape_size,), name="dividers", scope="local")
+        flattened_indices = ib.allocate(
+            "int64", (sparse_indices_ptr.shape[0],), name="flattened_indices", scope="local"
+        )
+        total_ele = ib.allocate("int64", (1,), name="total_ele", scope="local")
+        division_total_ele = ib.allocate("int64", (1,), name="division_total_ele", scope="local")
+        equal_shape = ib.allocate("bool", (1,), name="equal_shape", scope="local")
+        max_threads = int(tvm.target.Target.current(allow_none=False).max_num_threads)
+        with ib.new_scope():
+            # The computation in this block is very very miniscule since we are just iterating over
+            # shape tensors which are very small (< 10) and there is no need of parallelization
+            nthread_tx = 1
+            nthread_bx = 1
+            tx = te.thread_axis("threadIdx.x")
+            bx = te.thread_axis("blockIdx.x")
+            ib.scope_attr(tx, "thread_extent", nthread_tx)
+            ib.scope_attr(bx, "thread_extent", nthread_bx)
+
+            total_ele[0] = prev_shape[0]
+
+            # Cumulative Reverse Exclusive Multiply
+            multipliers[prev_shape_size - 1] = Cast("int64", 1)
+            with ib.for_range(0, prev_shape_size - 1) as i_:
+                i = i_ + 1
+                multipliers[prev_shape_size - 1 - i] = (
+                    prev_shape[prev_shape_size - i] * multipliers[prev_shape_size - i]
+                )
+                total_ele[0] *= prev_shape[prev_shape_size - i]
+
+            division_total_ele[0] = Cast("int64", 1)
+            with ib.for_range(0, new_shape_size) as i:
+                with ib.if_scope(new_shape[i] != -1):
+                    division_total_ele[0] *= new_shape[i]
+
+            # Compute true output shape (replace negative ones)
+            with ib.for_range(0, new_shape_size) as i:
+                with ib.if_scope(new_shape[i] == -1):
+                    out_new_shape[i] = Cast("int64", div(total_ele[0], division_total_ele[0]))
+                with ib.else_scope():
+                    out_new_shape[i] = new_shape[i]
+
+            # Check if prev_shape and new_shape are equal
+            equal_shape[0] = True
+            with ib.if_scope(prev_shape_size == new_shape_size):
+                with ib.for_range(0, prev_shape_size) as i:
+                    with ib.if_scope(prev_shape[i] != out_new_shape[i]):
+                        equal_shape[0] = False
+            with ib.else_scope():
+                equal_shape[0] = False
+

Review comment:
       You need to make sure all the memory has been written to.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] codeislife99 commented on a change in pull request #7477: SparseReshape Op

Posted by GitBox <gi...@apache.org>.

codeislife99 commented on a change in pull request #7477:
URL: https://github.com/apache/tvm/pull/7477#discussion_r582393581



##########
File path: python/tvm/topi/cuda/sparse_reshape.py
##########
@@ -0,0 +1,232 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# pylint: disable=invalid-name, too-many-arguments, too-many-nested-blocks
+"""Sparse_Reshape operator"""
+import tvm
+from tvm import te
+from ...tir import decl_buffer, ir_builder, Cast
+from ...te import extern, div, floordiv, floormod
+from ..utils import ceil_div
+
+
+def sparse_reshape(
+    sparse_indices,
+    prev_shape,
+    new_shape,
+    new_sparse_indices_shape,
+    new_shape_shape,
+):
+    """
+    Reshape a Sparse Tensor
+    Parameters
+    ----------
+    sparse_indices : relay.Expr
+        A 2-D tensor[N, n_dim] of integers containing location of sparse values, where N is the
+        number of sparse values and n_dim is the number of dimensions of the dense_shape
+    prev_shape : relay.Expr
+        A 1-D tensor containing the previous shape of the dense tensor
+    new_shape : relay.Expr
+        A 1-D tensor containing the new shape of the dense tensor
+    Returns
+    -------
+    result: relay.Expr
+        Output tensor.
+    Examples
+    --------
+    .. code-block:: python
+        sparse_indices = [[0, 0, 0],
+                            [0, 0, 1],
+                            [0, 1, 0],
+                            [1, 0, 0],
+                            [1, 2, 3]]
+        prev_shape = [2, 3, 4]
+        new_shape = [9, -1]
+        new_sparse_indices, new_shape = relay.sparse_reshape(sparse_indices,
+                            prev_shape,
+                            new_shape)
+        new_sparse_indices = [[0, 0],
+                              [0, 1],
+                              [1, 2],
+                              [4, 2],
+                              [8, 1]]
+        new_shape = [9, 4]
+    """
+
+    def gen_ir(
+        sparse_indices_ptr,
+        prev_shape_ptr,
+        new_shape_ptr,
+        new_sparse_indices_ptr,
+        out_new_shape_ptr,
+    ):
+        ib = ir_builder.create()
+
+        sparse_indices = ib.buffer_ptr(sparse_indices_ptr)
+        prev_shape = ib.buffer_ptr(prev_shape_ptr)
+
+        new_shape = ib.buffer_ptr(new_shape_ptr)
+        out_new_shape = ib.buffer_ptr(out_new_shape_ptr)
+        new_sparse_indices = ib.buffer_ptr(new_sparse_indices_ptr)
+        out_new_shape = ib.buffer_ptr(out_new_shape_ptr)
+
+        prev_shape_size = prev_shape_ptr.shape[0]
+        new_shape_size = new_shape_ptr.shape[0]
+
+        multipliers = ib.allocate("int64", (prev_shape_size,), name="multipliers", scope="local")
+        dividers = ib.allocate("int64", (new_shape_size,), name="dividers", scope="local")
+        flattened_indices = ib.allocate(
+            "int64", (sparse_indices_ptr.shape[0],), name="flattened_indices", scope="local"
+        )
+        total_ele = ib.allocate("int64", (1,), name="total_ele", scope="local")
+        division_total_ele = ib.allocate("int64", (1,), name="division_total_ele", scope="local")
+        equal_shape = ib.allocate("bool", (1,), name="equal_shape", scope="local")
+        max_threads = int(tvm.target.Target.current(allow_none=False).max_num_threads)
+        with ib.new_scope():
+            # The computation in this block is very very miniscule since we are just iterating over
+            # shape tensors which are very small (< 10) and there is no need of parallelization
+            nthread_tx = 1
+            nthread_bx = 1
+            tx = te.thread_axis("threadIdx.x")
+            bx = te.thread_axis("blockIdx.x")
+            ib.scope_attr(tx, "thread_extent", nthread_tx)
+            ib.scope_attr(bx, "thread_extent", nthread_bx)
+
+            total_ele[0] = prev_shape[0]
+
+            # Cumulative Reverse Exclusive Multiply
+            multipliers[prev_shape_size - 1] = Cast("int64", 1)
+            with ib.for_range(0, prev_shape_size - 1) as i_:
+                i = i_ + 1
+                multipliers[prev_shape_size - 1 - i] = (
+                    prev_shape[prev_shape_size - i] * multipliers[prev_shape_size - i]
+                )
+                total_ele[0] *= prev_shape[prev_shape_size - i]
+
+            division_total_ele[0] = Cast("int64", 1)
+            with ib.for_range(0, new_shape_size) as i:
+                with ib.if_scope(new_shape[i] != -1):
+                    division_total_ele[0] *= new_shape[i]
+
+            # Compute true output shape (replace negative ones)
+            with ib.for_range(0, new_shape_size) as i:
+                with ib.if_scope(new_shape[i] == -1):
+                    out_new_shape[i] = Cast("int64", div(total_ele[0], division_total_ele[0]))
+                with ib.else_scope():
+                    out_new_shape[i] = new_shape[i]
+
+            # Check if prev_shape and new_shape are equal
+            equal_shape[0] = True
+            with ib.if_scope(prev_shape_size == new_shape_size):
+                with ib.for_range(0, prev_shape_size) as i:
+                    with ib.if_scope(prev_shape[i] != out_new_shape[i]):
+                        equal_shape[0] = False
+            with ib.else_scope():
+                equal_shape[0] = False
+

Review comment:
       Thanks, @mbrookhart :) I am therefore marking this as resolved. 




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] mbrookhart commented on a change in pull request #7477: SparseReshape Op

Posted by GitBox <gi...@apache.org>.

mbrookhart commented on a change in pull request #7477:
URL: https://github.com/apache/tvm/pull/7477#discussion_r582327240



##########
File path: python/tvm/topi/cuda/sparse_reshape.py
##########
@@ -0,0 +1,232 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# pylint: disable=invalid-name, too-many-arguments, too-many-nested-blocks
+"""Sparse_Reshape operator"""
+import tvm
+from tvm import te
+from ...tir import decl_buffer, ir_builder, Cast
+from ...te import extern, div, floordiv, floormod
+from ..utils import ceil_div
+
+
+def sparse_reshape(
+    sparse_indices,
+    prev_shape,
+    new_shape,
+    new_sparse_indices_shape,
+    new_shape_shape,
+):
+    """
+    Reshape a Sparse Tensor
+    Parameters
+    ----------
+    sparse_indices : relay.Expr
+        A 2-D tensor[N, n_dim] of integers containing location of sparse values, where N is the
+        number of sparse values and n_dim is the number of dimensions of the dense_shape
+    prev_shape : relay.Expr
+        A 1-D tensor containing the previous shape of the dense tensor
+    new_shape : relay.Expr
+        A 1-D tensor containing the new shape of the dense tensor
+    Returns
+    -------
+    result: relay.Expr
+        Output tensor.
+    Examples
+    --------
+    .. code-block:: python
+        sparse_indices = [[0, 0, 0],
+                            [0, 0, 1],
+                            [0, 1, 0],
+                            [1, 0, 0],
+                            [1, 2, 3]]
+        prev_shape = [2, 3, 4]
+        new_shape = [9, -1]
+        new_sparse_indices, new_shape = relay.sparse_reshape(sparse_indices,
+                            prev_shape,
+                            new_shape)
+        new_sparse_indices = [[0, 0],
+                              [0, 1],
+                              [1, 2],
+                              [4, 2],
+                              [8, 1]]
+        new_shape = [9, 4]
+    """
+
+    def gen_ir(
+        sparse_indices_ptr,
+        prev_shape_ptr,
+        new_shape_ptr,
+        new_sparse_indices_ptr,
+        out_new_shape_ptr,
+    ):
+        ib = ir_builder.create()
+
+        sparse_indices = ib.buffer_ptr(sparse_indices_ptr)
+        prev_shape = ib.buffer_ptr(prev_shape_ptr)
+
+        new_shape = ib.buffer_ptr(new_shape_ptr)
+        out_new_shape = ib.buffer_ptr(out_new_shape_ptr)
+        new_sparse_indices = ib.buffer_ptr(new_sparse_indices_ptr)
+        out_new_shape = ib.buffer_ptr(out_new_shape_ptr)
+
+        prev_shape_size = prev_shape_ptr.shape[0]
+        new_shape_size = new_shape_ptr.shape[0]
+
+        multipliers = ib.allocate("int64", (prev_shape_size,), name="multipliers", scope="local")
+        dividers = ib.allocate("int64", (new_shape_size,), name="dividers", scope="local")
+        flattened_indices = ib.allocate(
+            "int64", (sparse_indices_ptr.shape[0],), name="flattened_indices", scope="local"
+        )
+        total_ele = ib.allocate("int64", (1,), name="total_ele", scope="local")
+        division_total_ele = ib.allocate("int64", (1,), name="division_total_ele", scope="local")
+        equal_shape = ib.allocate("bool", (1,), name="equal_shape", scope="local")
+        max_threads = int(tvm.target.Target.current(allow_none=False).max_num_threads)
+        with ib.new_scope():
+            # The computation in this block is very very miniscule since we are just iterating over
+            # shape tensors which are very small (< 10) and there is no need of parallelization
+            nthread_tx = 1
+            nthread_bx = 1
+            tx = te.thread_axis("threadIdx.x")
+            bx = te.thread_axis("blockIdx.x")
+            ib.scope_attr(tx, "thread_extent", nthread_tx)
+            ib.scope_attr(bx, "thread_extent", nthread_bx)
+
+            total_ele[0] = prev_shape[0]
+
+            # Cumulative Reverse Exclusive Multiply
+            multipliers[prev_shape_size - 1] = Cast("int64", 1)
+            with ib.for_range(0, prev_shape_size - 1) as i_:
+                i = i_ + 1
+                multipliers[prev_shape_size - 1 - i] = (
+                    prev_shape[prev_shape_size - i] * multipliers[prev_shape_size - i]
+                )
+                total_ele[0] *= prev_shape[prev_shape_size - i]
+
+            division_total_ele[0] = Cast("int64", 1)
+            with ib.for_range(0, new_shape_size) as i:
+                with ib.if_scope(new_shape[i] != -1):
+                    division_total_ele[0] *= new_shape[i]
+
+            # Compute true output shape (replace negative ones)
+            with ib.for_range(0, new_shape_size) as i:
+                with ib.if_scope(new_shape[i] == -1):
+                    out_new_shape[i] = Cast("int64", div(total_ele[0], division_total_ele[0]))
+                with ib.else_scope():
+                    out_new_shape[i] = new_shape[i]
+
+            # Check if prev_shape and new_shape are equal
+            equal_shape[0] = True
+            with ib.if_scope(prev_shape_size == new_shape_size):
+                with ib.for_range(0, prev_shape_size) as i:
+                    with ib.if_scope(prev_shape[i] != out_new_shape[i]):
+                        equal_shape[0] = False
+            with ib.else_scope():
+                equal_shape[0] = False
+

Review comment:
       When I read your code more closely, you have one scope above, and another scope below. That will end the first cuda kernel, return to host, and then launch a new one. That effectively does a global sync, I think this is fine.
   




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] tkonolige commented on a change in pull request #7477: SparseReshape Op

Posted by GitBox <gi...@apache.org>.

tkonolige commented on a change in pull request #7477:
URL: https://github.com/apache/tvm/pull/7477#discussion_r582400531



##########
File path: tests/python/relay/test_op_level3.py
##########
@@ -1311,6 +1311,232 @@ def verify_sparse_to_dense(sparse_indices, sparse_values, default_value, output_
     # verify_sparse_to_dense([[[[0, 1, 4], [0, 2, 4]]]], [[[[3.1, 3.1, 3.1]]]], 3.5, [5], [3.1, 3.1, 3.5, 3.5, 3.1])
 
 
+@tvm.testing.uses_gpu
+@pytest.mark.parametrize(
+    "sparse_indices_np, sparse_values_np, prev_shape_np, new_shape_np",
+    [
+        (
+            np.array([[0, 0, 0], [0, 0, 1], [0, 1, 0], [1, 0, 0], [1, 2, 3]], dtype=np.int64),
+            np.array([7, 5, 6, 3, 9], dtype=np.int64),
+            np.array([2, 3, 6], dtype=np.int64),
+            np.array([9, -1], dtype=np.int64),
+        ),
+        (
+            np.array(
+                [[0, 0, 0, 0], [0, 0, 1, 2], [0, 1, 0, 3], [1, 0, 0, 4], [1, 2, 3, 6]],
+                dtype=np.int64,
+            ),
+            np.array([7, 5, 6, 3, 9], dtype=np.int64),
+            np.array([2, 3, 6, 7], dtype=np.int64),
+            np.array([9, -1, 7], dtype=np.int64),
+        ),
+        (
+            np.array(
+                [
+                    [0, 0, 0, 0, 0],
+                    [0, 0, 1, 2, 3],
+                    [0, 1, 0, 3, 5],
+                    [1, 0, 0, 4, 6],
+                    [1, 2, 3, 6, 8],
+                ],
+                dtype=np.int64,
+            ),
+            np.array([7, 5, 6, 3, 9], dtype=np.int64),
+            np.array([2, 3, 6, 7, 9], dtype=np.int64),
+            np.array([9, -1, 7], dtype=np.int64),
+        ),
+        (
+            np.array([[0, 0], [0, 1], [3, 4], [4, 3], [7, 3]], dtype=np.int64),
+            np.array([7, 5, 6, 3, 9], dtype=np.int64),
+            np.array([9, 4], dtype=np.int64),
+            np.array([2, -1, 6], dtype=np.int64),
+        ),
+        (
+            np.array([[0, 0], [0, 1], [3, 4], [4, 3], [7, 3]], dtype=np.int64),
+            np.array([7, 5, 6, 3, 9], dtype=np.int64),
+            np.array([9, 4], dtype=np.int64),
+            np.array([-1], dtype=np.int64),
+        ),
+        (
+            np.array([[0], [5], [10], [20], [24]], dtype=np.int64),
+            np.array([7, 5, 6, 3, 9], dtype=np.int64),
+            np.array([25], dtype=np.int64),
+            np.array([5, 5], dtype=np.int64),
+        ),
+        (
+            np.array([[0, 100], [200, 100], [300, 400], [50, 20], [400, 50]], dtype=np.int64),
+            np.array([7, 5, 6, 3, 9], dtype=np.int64),
+            np.array([500, 20], dtype=np.int64),
+            np.array([500, 20], dtype=np.int64),
+        ),
+        (
+            np.array([[0, 100], [200, 100], [300, 400], [50, 20], [400, 50]], dtype=np.int64),
+            np.array([7, 5, 6, 3, 9], dtype=np.int64),
+            np.array([500, 20], dtype=np.int64),
+            np.array([500, -1], dtype=np.int64),
+        ),
+        (
+            np.array([[0, 100], [200, 100], [300, 400], [50, 20], [400, 50]], dtype=np.int64),
+            np.array([7, 5, 6, 3, 9], dtype=np.int64),
+            np.array([500, 20], dtype=np.int64),
+            np.array([250, 40], dtype=np.int64),
+        ),
+        (
+            np.ones((0, 1), dtype=np.int64),
+            np.array([], dtype=np.int64),
+            np.array([4], dtype=np.int64),
+            np.array([2, -1], dtype=np.int64),
+        ),
+        (
+            np.ones((0, 1), dtype=np.int64),
+            np.array([], dtype=np.int64),
+            np.array([4], dtype=np.int64),
+            np.array([2, 2], dtype=np.int64),
+        ),
+        (
+            np.ones((0, 2), dtype=np.int64),
+            np.array([], dtype=np.int64),
+            np.array([3, 6], dtype=np.int64),
+            np.array([-1, 2], dtype=np.int64),
+        ),
+    ],
+)
+@pytest.mark.parametrize("dtype", [np.int32, np.int64])
+@pytest.mark.parametrize("use_dyn", [True, False])
+def test_sparse_reshape(

Review comment:
       This is a lot of tests. It takes about a minute on my machine (which is decently beefy). Can you not parameterize dtype and instead vary the dtype in the input arrays?

##########
File path: tests/python/relay/test_op_level3.py
##########
@@ -1311,6 +1311,232 @@ def verify_sparse_to_dense(sparse_indices, sparse_values, default_value, output_
     # verify_sparse_to_dense([[[[0, 1, 4], [0, 2, 4]]]], [[[[3.1, 3.1, 3.1]]]], 3.5, [5], [3.1, 3.1, 3.5, 3.5, 3.1])
 
 
+@tvm.testing.uses_gpu
+@pytest.mark.parametrize(
+    "sparse_indices_np, sparse_values_np, prev_shape_np, new_shape_np",
+    [
+        (
+            np.array([[0, 0, 0], [0, 0, 1], [0, 1, 0], [1, 0, 0], [1, 2, 3]], dtype=np.int64),
+            np.array([7, 5, 6, 3, 9], dtype=np.int64),
+            np.array([2, 3, 6], dtype=np.int64),
+            np.array([9, -1], dtype=np.int64),
+        ),
+        (
+            np.array(
+                [[0, 0, 0, 0], [0, 0, 1, 2], [0, 1, 0, 3], [1, 0, 0, 4], [1, 2, 3, 6]],
+                dtype=np.int64,
+            ),
+            np.array([7, 5, 6, 3, 9], dtype=np.int64),
+            np.array([2, 3, 6, 7], dtype=np.int64),
+            np.array([9, -1, 7], dtype=np.int64),
+        ),
+        (
+            np.array(
+                [
+                    [0, 0, 0, 0, 0],
+                    [0, 0, 1, 2, 3],
+                    [0, 1, 0, 3, 5],
+                    [1, 0, 0, 4, 6],
+                    [1, 2, 3, 6, 8],
+                ],
+                dtype=np.int64,
+            ),
+            np.array([7, 5, 6, 3, 9], dtype=np.int64),
+            np.array([2, 3, 6, 7, 9], dtype=np.int64),
+            np.array([9, -1, 7], dtype=np.int64),
+        ),
+        (
+            np.array([[0, 0], [0, 1], [3, 4], [4, 3], [7, 3]], dtype=np.int64),
+            np.array([7, 5, 6, 3, 9], dtype=np.int64),
+            np.array([9, 4], dtype=np.int64),
+            np.array([2, -1, 6], dtype=np.int64),
+        ),
+        (
+            np.array([[0, 0], [0, 1], [3, 4], [4, 3], [7, 3]], dtype=np.int64),
+            np.array([7, 5, 6, 3, 9], dtype=np.int64),
+            np.array([9, 4], dtype=np.int64),
+            np.array([-1], dtype=np.int64),
+        ),
+        (
+            np.array([[0], [5], [10], [20], [24]], dtype=np.int64),
+            np.array([7, 5, 6, 3, 9], dtype=np.int64),
+            np.array([25], dtype=np.int64),
+            np.array([5, 5], dtype=np.int64),
+        ),
+        (
+            np.array([[0, 100], [200, 100], [300, 400], [50, 20], [400, 50]], dtype=np.int64),
+            np.array([7, 5, 6, 3, 9], dtype=np.int64),
+            np.array([500, 20], dtype=np.int64),
+            np.array([500, 20], dtype=np.int64),
+        ),
+        (
+            np.array([[0, 100], [200, 100], [300, 400], [50, 20], [400, 50]], dtype=np.int64),
+            np.array([7, 5, 6, 3, 9], dtype=np.int64),
+            np.array([500, 20], dtype=np.int64),
+            np.array([500, -1], dtype=np.int64),
+        ),
+        (
+            np.array([[0, 100], [200, 100], [300, 400], [50, 20], [400, 50]], dtype=np.int64),
+            np.array([7, 5, 6, 3, 9], dtype=np.int64),
+            np.array([500, 20], dtype=np.int64),
+            np.array([250, 40], dtype=np.int64),
+        ),
+        (
+            np.ones((0, 1), dtype=np.int64),
+            np.array([], dtype=np.int64),
+            np.array([4], dtype=np.int64),
+            np.array([2, -1], dtype=np.int64),
+        ),
+        (
+            np.ones((0, 1), dtype=np.int64),
+            np.array([], dtype=np.int64),
+            np.array([4], dtype=np.int64),
+            np.array([2, 2], dtype=np.int64),
+        ),
+        (
+            np.ones((0, 2), dtype=np.int64),
+            np.array([], dtype=np.int64),
+            np.array([3, 6], dtype=np.int64),
+            np.array([-1, 2], dtype=np.int64),
+        ),
+    ],
+)
+@pytest.mark.parametrize("dtype", [np.int32, np.int64])
+@pytest.mark.parametrize("use_dyn", [True, False])
+def test_sparse_reshape(
+    sparse_indices_np, sparse_values_np, prev_shape_np, new_shape_np, dtype, use_dyn
+):
+    def ref_sparse_reshape(

Review comment:
       This is a lot of code and I'm not sure it is correct. Could you add a test where you compare to a known-good output?




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] codeislife99 commented on a change in pull request #7477: SparseReshape Op

Posted by GitBox <gi...@apache.org>.

codeislife99 commented on a change in pull request #7477:
URL: https://github.com/apache/tvm/pull/7477#discussion_r582381895



##########
File path: python/tvm/topi/cuda/sparse_reshape.py
##########
@@ -0,0 +1,232 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# pylint: disable=invalid-name, too-many-arguments, too-many-nested-blocks
+"""Sparse_Reshape operator"""
+import tvm
+from tvm import te
+from ...tir import decl_buffer, ir_builder, Cast
+from ...te import extern, div, floordiv, floormod
+from ..utils import ceil_div
+
+
+def sparse_reshape(
+    sparse_indices,
+    prev_shape,
+    new_shape,
+    new_sparse_indices_shape,
+    new_shape_shape,
+):
+    """
+    Reshape a Sparse Tensor
+    Parameters
+    ----------
+    sparse_indices : relay.Expr
+        A 2-D tensor[N, n_dim] of integers containing location of sparse values, where N is the
+        number of sparse values and n_dim is the number of dimensions of the dense_shape
+    prev_shape : relay.Expr
+        A 1-D tensor containing the previous shape of the dense tensor
+    new_shape : relay.Expr
+        A 1-D tensor containing the new shape of the dense tensor
+    Returns
+    -------
+    result: relay.Expr
+        Output tensor.
+    Examples
+    --------
+    .. code-block:: python
+        sparse_indices = [[0, 0, 0],
+                            [0, 0, 1],
+                            [0, 1, 0],
+                            [1, 0, 0],
+                            [1, 2, 3]]
+        prev_shape = [2, 3, 4]
+        new_shape = [9, -1]
+        new_sparse_indices, new_shape = relay.sparse_reshape(sparse_indices,
+                            prev_shape,
+                            new_shape)
+        new_sparse_indices = [[0, 0],
+                              [0, 1],
+                              [1, 2],
+                              [4, 2],
+                              [8, 1]]
+        new_shape = [9, 4]
+    """
+
+    def gen_ir(
+        sparse_indices_ptr,
+        prev_shape_ptr,
+        new_shape_ptr,
+        new_sparse_indices_ptr,
+        out_new_shape_ptr,
+    ):
+        ib = ir_builder.create()
+
+        sparse_indices = ib.buffer_ptr(sparse_indices_ptr)
+        prev_shape = ib.buffer_ptr(prev_shape_ptr)
+
+        new_shape = ib.buffer_ptr(new_shape_ptr)
+        out_new_shape = ib.buffer_ptr(out_new_shape_ptr)
+        new_sparse_indices = ib.buffer_ptr(new_sparse_indices_ptr)
+        out_new_shape = ib.buffer_ptr(out_new_shape_ptr)
+
+        prev_shape_size = prev_shape_ptr.shape[0]
+        new_shape_size = new_shape_ptr.shape[0]
+
+        multipliers = ib.allocate("int64", (prev_shape_size,), name="multipliers", scope="local")
+        dividers = ib.allocate("int64", (new_shape_size,), name="dividers", scope="local")
+        flattened_indices = ib.allocate(
+            "int64", (sparse_indices_ptr.shape[0],), name="flattened_indices", scope="local"
+        )
+        total_ele = ib.allocate("int64", (1,), name="total_ele", scope="local")
+        division_total_ele = ib.allocate("int64", (1,), name="division_total_ele", scope="local")
+        equal_shape = ib.allocate("bool", (1,), name="equal_shape", scope="local")
+        max_threads = int(tvm.target.Target.current(allow_none=False).max_num_threads)

Review comment:
       Done.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] codeislife99 commented on a change in pull request #7477: SparseReshape Op

Posted by GitBox <gi...@apache.org>.

codeislife99 commented on a change in pull request #7477:
URL: https://github.com/apache/tvm/pull/7477#discussion_r582411582



##########
File path: tests/python/relay/test_op_level3.py
##########
@@ -1311,6 +1311,232 @@ def verify_sparse_to_dense(sparse_indices, sparse_values, default_value, output_
     # verify_sparse_to_dense([[[[0, 1, 4], [0, 2, 4]]]], [[[[3.1, 3.1, 3.1]]]], 3.5, [5], [3.1, 3.1, 3.5, 3.5, 3.1])
 
 
+@tvm.testing.uses_gpu
+@pytest.mark.parametrize(
+    "sparse_indices_np, sparse_values_np, prev_shape_np, new_shape_np",
+    [
+        (
+            np.array([[0, 0, 0], [0, 0, 1], [0, 1, 0], [1, 0, 0], [1, 2, 3]], dtype=np.int64),
+            np.array([7, 5, 6, 3, 9], dtype=np.int64),
+            np.array([2, 3, 6], dtype=np.int64),
+            np.array([9, -1], dtype=np.int64),
+        ),
+        (
+            np.array(
+                [[0, 0, 0, 0], [0, 0, 1, 2], [0, 1, 0, 3], [1, 0, 0, 4], [1, 2, 3, 6]],
+                dtype=np.int64,
+            ),
+            np.array([7, 5, 6, 3, 9], dtype=np.int64),
+            np.array([2, 3, 6, 7], dtype=np.int64),
+            np.array([9, -1, 7], dtype=np.int64),
+        ),
+        (
+            np.array(
+                [
+                    [0, 0, 0, 0, 0],
+                    [0, 0, 1, 2, 3],
+                    [0, 1, 0, 3, 5],
+                    [1, 0, 0, 4, 6],
+                    [1, 2, 3, 6, 8],
+                ],
+                dtype=np.int64,
+            ),
+            np.array([7, 5, 6, 3, 9], dtype=np.int64),
+            np.array([2, 3, 6, 7, 9], dtype=np.int64),
+            np.array([9, -1, 7], dtype=np.int64),
+        ),
+        (
+            np.array([[0, 0], [0, 1], [3, 4], [4, 3], [7, 3]], dtype=np.int64),
+            np.array([7, 5, 6, 3, 9], dtype=np.int64),
+            np.array([9, 4], dtype=np.int64),
+            np.array([2, -1, 6], dtype=np.int64),
+        ),
+        (
+            np.array([[0, 0], [0, 1], [3, 4], [4, 3], [7, 3]], dtype=np.int64),
+            np.array([7, 5, 6, 3, 9], dtype=np.int64),
+            np.array([9, 4], dtype=np.int64),
+            np.array([-1], dtype=np.int64),
+        ),
+        (
+            np.array([[0], [5], [10], [20], [24]], dtype=np.int64),
+            np.array([7, 5, 6, 3, 9], dtype=np.int64),
+            np.array([25], dtype=np.int64),
+            np.array([5, 5], dtype=np.int64),
+        ),
+        (
+            np.array([[0, 100], [200, 100], [300, 400], [50, 20], [400, 50]], dtype=np.int64),
+            np.array([7, 5, 6, 3, 9], dtype=np.int64),
+            np.array([500, 20], dtype=np.int64),
+            np.array([500, 20], dtype=np.int64),
+        ),
+        (
+            np.array([[0, 100], [200, 100], [300, 400], [50, 20], [400, 50]], dtype=np.int64),
+            np.array([7, 5, 6, 3, 9], dtype=np.int64),
+            np.array([500, 20], dtype=np.int64),
+            np.array([500, -1], dtype=np.int64),
+        ),
+        (
+            np.array([[0, 100], [200, 100], [300, 400], [50, 20], [400, 50]], dtype=np.int64),
+            np.array([7, 5, 6, 3, 9], dtype=np.int64),
+            np.array([500, 20], dtype=np.int64),
+            np.array([250, 40], dtype=np.int64),
+        ),
+        (
+            np.ones((0, 1), dtype=np.int64),
+            np.array([], dtype=np.int64),
+            np.array([4], dtype=np.int64),
+            np.array([2, -1], dtype=np.int64),
+        ),
+        (
+            np.ones((0, 1), dtype=np.int64),
+            np.array([], dtype=np.int64),
+            np.array([4], dtype=np.int64),
+            np.array([2, 2], dtype=np.int64),
+        ),
+        (
+            np.ones((0, 2), dtype=np.int64),
+            np.array([], dtype=np.int64),
+            np.array([3, 6], dtype=np.int64),
+            np.array([-1, 2], dtype=np.int64),
+        ),
+    ],
+)
+@pytest.mark.parametrize("dtype", [np.int32, np.int64])
+@pytest.mark.parametrize("use_dyn", [True, False])
+def test_sparse_reshape(

Review comment:
       Sure. 




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] codeislife99 commented on pull request #7477: SparseReshape Op

Posted by GitBox <gi...@apache.org>.

codeislife99 commented on pull request #7477:
URL: https://github.com/apache/tvm/pull/7477#issuecomment-782474714


   @tkonolige @mbrookhart PTAL. 


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] codeislife99 commented on a change in pull request #7477: SparseReshape Op

Posted by GitBox <gi...@apache.org>.

codeislife99 commented on a change in pull request #7477:
URL: https://github.com/apache/tvm/pull/7477#discussion_r582411532



##########
File path: python/tvm/topi/cuda/sparse_reshape.py
##########
@@ -0,0 +1,232 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# pylint: disable=invalid-name, too-many-arguments, too-many-nested-blocks
+"""Sparse_Reshape operator"""
+import tvm
+from tvm import te
+from ...tir import decl_buffer, ir_builder, Cast
+from ...te import extern, div, floordiv, floormod
+from ..utils import ceil_div
+
+
+def sparse_reshape(
+    sparse_indices,
+    prev_shape,
+    new_shape,
+    new_sparse_indices_shape,
+    new_shape_shape,
+):
+    """
+    Reshape a Sparse Tensor
+    Parameters
+    ----------
+    sparse_indices : relay.Expr
+        A 2-D tensor[N, n_dim] of integers containing location of sparse values, where N is the
+        number of sparse values and n_dim is the number of dimensions of the dense_shape
+    prev_shape : relay.Expr
+        A 1-D tensor containing the previous shape of the dense tensor
+    new_shape : relay.Expr
+        A 1-D tensor containing the new shape of the dense tensor
+    Returns
+    -------
+    result: relay.Expr
+        Output tensor.
+    Examples
+    --------
+    .. code-block:: python
+        sparse_indices = [[0, 0, 0],
+                            [0, 0, 1],
+                            [0, 1, 0],
+                            [1, 0, 0],
+                            [1, 2, 3]]
+        prev_shape = [2, 3, 4]
+        new_shape = [9, -1]
+        new_sparse_indices, new_shape = relay.sparse_reshape(sparse_indices,
+                            prev_shape,
+                            new_shape)
+        new_sparse_indices = [[0, 0],
+                              [0, 1],
+                              [1, 2],
+                              [4, 2],
+                              [8, 1]]
+        new_shape = [9, 4]
+    """
+
+    def gen_ir(
+        sparse_indices_ptr,
+        prev_shape_ptr,
+        new_shape_ptr,
+        new_sparse_indices_ptr,
+        out_new_shape_ptr,
+    ):
+        ib = ir_builder.create()
+
+        sparse_indices = ib.buffer_ptr(sparse_indices_ptr)
+        prev_shape = ib.buffer_ptr(prev_shape_ptr)
+
+        new_shape = ib.buffer_ptr(new_shape_ptr)
+        out_new_shape = ib.buffer_ptr(out_new_shape_ptr)
+        new_sparse_indices = ib.buffer_ptr(new_sparse_indices_ptr)
+        out_new_shape = ib.buffer_ptr(out_new_shape_ptr)
+
+        prev_shape_size = prev_shape_ptr.shape[0]
+        new_shape_size = new_shape_ptr.shape[0]
+
+        multipliers = ib.allocate("int64", (prev_shape_size,), name="multipliers", scope="local")
+        dividers = ib.allocate("int64", (new_shape_size,), name="dividers", scope="local")
+        flattened_indices = ib.allocate(
+            "int64", (sparse_indices_ptr.shape[0],), name="flattened_indices", scope="local"
+        )
+        total_ele = ib.allocate("int64", (1,), name="total_ele", scope="local")
+        division_total_ele = ib.allocate("int64", (1,), name="division_total_ele", scope="local")
+        equal_shape = ib.allocate("bool", (1,), name="equal_shape", scope="local")
+        max_threads = int(tvm.target.Target.current(allow_none=False).max_num_threads)
+        with ib.new_scope():
+            # The computation in this block is very very miniscule since we are just iterating over
+            # shape tensors which are very small (< 10) and there is no need of parallelization
+            nthread_tx = 1
+            nthread_bx = 1
+            tx = te.thread_axis("threadIdx.x")
+            bx = te.thread_axis("blockIdx.x")
+            ib.scope_attr(tx, "thread_extent", nthread_tx)
+            ib.scope_attr(bx, "thread_extent", nthread_bx)
+
+            total_ele[0] = prev_shape[0]
+
+            # Cumulative Reverse Exclusive Multiply
+            multipliers[prev_shape_size - 1] = Cast("int64", 1)
+            with ib.for_range(0, prev_shape_size - 1) as i_:
+                i = i_ + 1
+                multipliers[prev_shape_size - 1 - i] = (
+                    prev_shape[prev_shape_size - i] * multipliers[prev_shape_size - i]
+                )
+                total_ele[0] *= prev_shape[prev_shape_size - i]
+
+            division_total_ele[0] = Cast("int64", 1)
+            with ib.for_range(0, new_shape_size) as i:
+                with ib.if_scope(new_shape[i] != -1):
+                    division_total_ele[0] *= new_shape[i]
+
+            # Compute true output shape (replace negative ones)
+            with ib.for_range(0, new_shape_size) as i:
+                with ib.if_scope(new_shape[i] == -1):
+                    out_new_shape[i] = Cast("int64", div(total_ele[0], division_total_ele[0]))
+                with ib.else_scope():
+                    out_new_shape[i] = new_shape[i]
+
+            # Check if prev_shape and new_shape are equal
+            equal_shape[0] = True
+            with ib.if_scope(prev_shape_size == new_shape_size):
+                with ib.for_range(0, prev_shape_size) as i:
+                    with ib.if_scope(prev_shape[i] != out_new_shape[i]):
+                        equal_shape[0] = False
+            with ib.else_scope():
+                equal_shape[0] = False
+
+        with ib.new_scope():
+            nthread_tx = max_threads
+            nthread_bx = ceil_div(sparse_indices_ptr.shape[0], max_threads)
+            tx = te.thread_axis("threadIdx.x")
+            bx = te.thread_axis("blockIdx.x")
+            ib.scope_attr(tx, "thread_extent", nthread_tx)
+            ib.scope_attr(bx, "thread_extent", nthread_bx)
+
+            row_number = bx * max_threads + tx
+
+            # Return same inputs if shapes are equal
+            with ib.if_scope(equal_shape[0]):
+                with ib.if_scope(row_number < sparse_indices_ptr.shape[0]):
+                    with ib.for_range(0, sparse_indices_ptr.shape[1]) as j:
+                        new_sparse_indices[row_number, j] = sparse_indices[row_number, j]
+
+            # Else compute new_sparse_indices
+            with ib.else_scope():
+                dividers[new_shape_size - 1] = Cast("int64", 1)
+                with ib.for_range(0, new_shape_size - 1) as i_:
+                    i = i_ + 1
+                    dividers[new_shape_size - 1 - i] = (
+                        dividers[new_shape_size - i] * out_new_shape[new_shape_size - i]
+                    )
+
+                with ib.if_scope(row_number < sparse_indices_ptr.shape[0]):
+                    flattened_indices[row_number] = Cast("int64", 0)
+                    with ib.for_range(0, sparse_indices_ptr.shape[1]) as j:
+                        flattened_indices[row_number] += (
+                            sparse_indices[row_number, j] * multipliers[j]
+                        )
+
+                with ib.if_scope(row_number < sparse_indices_ptr.shape[0]):
+                    current_element = ib.allocate(
+                        "int64", (1,), name="current_element", scope="local"
+                    )
+                    current_element[0] = flattened_indices[row_number]
+
+                    with ib.for_range(0, new_sparse_indices_ptr.shape[1]) as j:
+                        new_sparse_indices[row_number, j] = Cast(
+                            "int64", floordiv(current_element[0], dividers[j])
+                        )
+                        current_element[0] = floormod(current_element[0], dividers[j])
+
+        return ib.get()
+
+    new_sparse_indices_buf = decl_buffer(
+        new_sparse_indices_shape, "int64", "new_sparse_indices_buf"
+    )
+    new_shape_buf = decl_buffer(new_shape_shape, "int64", "new_shape_buf")
+
+    return extern(
+        [new_sparse_indices_shape, new_shape_shape],
+        [sparse_indices, prev_shape, new_shape],
+        lambda ins, outs: gen_ir(ins[0], ins[1], ins[2], outs[0], outs[1]),
+        dtype="int64",
+        out_buffers=[new_sparse_indices_buf, new_shape_buf],
+        name="sparse_reshape_cuda",
+        tag="sparse_reshape_cuda",
+    )
+
+
+def _default_schedule(outs):
+    """Default schedule for gpu."""
+    outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
+    s = te.create_schedule([x.op for x in outs])
+    scheduled_ops = []
+
+    def traverse(op):

Review comment:
       Done.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] mbrookhart commented on a change in pull request #7477: SparseReshape Op

Posted by GitBox <gi...@apache.org>.

mbrookhart commented on a change in pull request #7477:
URL: https://github.com/apache/tvm/pull/7477#discussion_r580685169



##########
File path: python/tvm/topi/cuda/sparse_reshape.py
##########
@@ -0,0 +1,232 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# pylint: disable=invalid-name, too-many-arguments, too-many-nested-blocks
+"""Sparse_Reshape operator"""
+import tvm
+from tvm import te
+from ...tir import decl_buffer, ir_builder, Cast
+from ...te import extern, div, floordiv, floormod
+from ..utils import ceil_div
+
+
+def sparse_reshape(
+    sparse_indices,
+    prev_shape,
+    new_shape,
+    new_sparse_indices_shape,
+    new_shape_shape,
+):
+    """
+    Reshape a Sparse Tensor
+    Parameters
+    ----------
+    sparse_indices : relay.Expr
+        A 2-D tensor[N, n_dim] of integers containing location of sparse values, where N is the
+        number of sparse values and n_dim is the number of dimensions of the dense_shape
+    prev_shape : relay.Expr
+        A 1-D tensor containing the previous shape of the dense tensor
+    new_shape : relay.Expr
+        A 1-D tensor containing the new shape of the dense tensor
+    Returns
+    -------
+    result: relay.Expr
+        Output tensor.
+    Examples
+    --------
+    .. code-block:: python
+        sparse_indices = [[0, 0, 0],
+                            [0, 0, 1],
+                            [0, 1, 0],
+                            [1, 0, 0],
+                            [1, 2, 3]]
+        prev_shape = [2, 3, 4]
+        new_shape = [9, -1]
+        new_sparse_indices, new_shape = relay.sparse_reshape(sparse_indices,
+                            prev_shape,
+                            new_shape)
+        new_sparse_indices = [[0, 0],
+                              [0, 1],
+                              [1, 2],
+                              [4, 2],
+                              [8, 1]]
+        new_shape = [9, 4]
+    """
+
+    def gen_ir(
+        sparse_indices_ptr,
+        prev_shape_ptr,
+        new_shape_ptr,
+        new_sparse_indices_ptr,
+        out_new_shape_ptr,
+    ):
+        ib = ir_builder.create()
+
+        sparse_indices = ib.buffer_ptr(sparse_indices_ptr)
+        prev_shape = ib.buffer_ptr(prev_shape_ptr)
+
+        new_shape = ib.buffer_ptr(new_shape_ptr)
+        out_new_shape = ib.buffer_ptr(out_new_shape_ptr)
+        new_sparse_indices = ib.buffer_ptr(new_sparse_indices_ptr)
+        out_new_shape = ib.buffer_ptr(out_new_shape_ptr)
+
+        prev_shape_size = prev_shape_ptr.shape[0]
+        new_shape_size = new_shape_ptr.shape[0]
+
+        multipliers = ib.allocate("int64", (prev_shape_size,), name="multipliers", scope="local")
+        dividers = ib.allocate("int64", (new_shape_size,), name="dividers", scope="local")
+        flattened_indices = ib.allocate(
+            "int64", (sparse_indices_ptr.shape[0],), name="flattened_indices", scope="local"
+        )
+        total_ele = ib.allocate("int64", (1,), name="total_ele", scope="local")
+        division_total_ele = ib.allocate("int64", (1,), name="division_total_ele", scope="local")
+        equal_shape = ib.allocate("bool", (1,), name="equal_shape", scope="local")
+        max_threads = int(tvm.target.Target.current(allow_none=False).max_num_threads)
+        with ib.new_scope():
+            # The computation in this block is very very miniscule since we are just iterating over
+            # shape tensors which are very small (< 10) and there is no need of parallelization
+            nthread_tx = 1
+            nthread_bx = 1
+            tx = te.thread_axis("threadIdx.x")
+            bx = te.thread_axis("blockIdx.x")
+            ib.scope_attr(tx, "thread_extent", nthread_tx)
+            ib.scope_attr(bx, "thread_extent", nthread_bx)
+
+            total_ele[0] = prev_shape[0]
+
+            # Cumulative Reverse Exclusive Multiply
+            multipliers[prev_shape_size - 1] = Cast("int64", 1)
+            with ib.for_range(0, prev_shape_size - 1) as i_:
+                i = i_ + 1
+                multipliers[prev_shape_size - 1 - i] = (
+                    prev_shape[prev_shape_size - i] * multipliers[prev_shape_size - i]
+                )
+                total_ele[0] *= prev_shape[prev_shape_size - i]
+
+            division_total_ele[0] = Cast("int64", 1)
+            with ib.for_range(0, new_shape_size) as i:
+                with ib.if_scope(new_shape[i] != -1):
+                    division_total_ele[0] *= new_shape[i]
+
+            # Compute true output shape (replace negative ones)
+            with ib.for_range(0, new_shape_size) as i:
+                with ib.if_scope(new_shape[i] == -1):
+                    out_new_shape[i] = Cast("int64", div(total_ele[0], division_total_ele[0]))
+                with ib.else_scope():
+                    out_new_shape[i] = new_shape[i]
+
+            # Check if prev_shape and new_shape are equal
+            equal_shape[0] = True
+            with ib.if_scope(prev_shape_size == new_shape_size):
+                with ib.for_range(0, prev_shape_size) as i:
+                    with ib.if_scope(prev_shape[i] != out_new_shape[i]):
+                        equal_shape[0] = False
+            with ib.else_scope():
+                equal_shape[0] = False
+

Review comment:
       There is no global sync, you need to break the cuda kernel, go back to host, and then launch a new cuda kernel to get a global sync




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] codeislife99 commented on a change in pull request #7477: SparseReshape Op

Posted by GitBox <gi...@apache.org>.

codeislife99 commented on a change in pull request #7477:
URL: https://github.com/apache/tvm/pull/7477#discussion_r580611119



##########
File path: python/tvm/topi/cuda/sparse_reshape.py
##########
@@ -0,0 +1,232 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# pylint: disable=invalid-name, too-many-arguments, too-many-nested-blocks
+"""Sparse_Reshape operator"""
+import tvm
+from tvm import te
+from ...tir import decl_buffer, ir_builder, Cast
+from ...te import extern, div, floordiv, floormod
+from ..utils import ceil_div
+
+
+def sparse_reshape(
+    sparse_indices,
+    prev_shape,
+    new_shape,
+    new_sparse_indices_shape,
+    new_shape_shape,
+):
+    """
+    Reshape a Sparse Tensor
+    Parameters
+    ----------
+    sparse_indices : relay.Expr
+        A 2-D tensor[N, n_dim] of integers containing location of sparse values, where N is the
+        number of sparse values and n_dim is the number of dimensions of the dense_shape
+    prev_shape : relay.Expr
+        A 1-D tensor containing the previous shape of the dense tensor
+    new_shape : relay.Expr
+        A 1-D tensor containing the new shape of the dense tensor
+    Returns
+    -------
+    result: relay.Expr
+        Output tensor.
+    Examples
+    --------
+    .. code-block:: python
+        sparse_indices = [[0, 0, 0],
+                            [0, 0, 1],
+                            [0, 1, 0],
+                            [1, 0, 0],
+                            [1, 2, 3]]
+        prev_shape = [2, 3, 4]
+        new_shape = [9, -1]
+        new_sparse_indices, new_shape = relay.sparse_reshape(sparse_indices,
+                            prev_shape,
+                            new_shape)
+        new_sparse_indices = [[0, 0],
+                              [0, 1],
+                              [1, 2],
+                              [4, 2],
+                              [8, 1]]
+        new_shape = [9, 4]
+    """
+
+    def gen_ir(
+        sparse_indices_ptr,
+        prev_shape_ptr,
+        new_shape_ptr,
+        new_sparse_indices_ptr,
+        out_new_shape_ptr,
+    ):
+        ib = ir_builder.create()
+
+        sparse_indices = ib.buffer_ptr(sparse_indices_ptr)
+        prev_shape = ib.buffer_ptr(prev_shape_ptr)
+
+        new_shape = ib.buffer_ptr(new_shape_ptr)
+        out_new_shape = ib.buffer_ptr(out_new_shape_ptr)
+        new_sparse_indices = ib.buffer_ptr(new_sparse_indices_ptr)
+        out_new_shape = ib.buffer_ptr(out_new_shape_ptr)
+
+        prev_shape_size = prev_shape_ptr.shape[0]
+        new_shape_size = new_shape_ptr.shape[0]
+
+        multipliers = ib.allocate("int64", (prev_shape_size,), name="multipliers", scope="local")
+        dividers = ib.allocate("int64", (new_shape_size,), name="dividers", scope="local")
+        flattened_indices = ib.allocate(
+            "int64", (sparse_indices_ptr.shape[0],), name="flattened_indices", scope="local"
+        )
+        total_ele = ib.allocate("int64", (1,), name="total_ele", scope="local")
+        division_total_ele = ib.allocate("int64", (1,), name="division_total_ele", scope="local")
+        equal_shape = ib.allocate("bool", (1,), name="equal_shape", scope="local")
+        max_threads = int(tvm.target.Target.current(allow_none=False).max_num_threads)
+        with ib.new_scope():
+            # The computation in this block is very very miniscule since we are just iterating over
+            # shape tensors which are very small (< 10) and there is no need of parallelization
+            nthread_tx = 1
+            nthread_bx = 1
+            tx = te.thread_axis("threadIdx.x")
+            bx = te.thread_axis("blockIdx.x")
+            ib.scope_attr(tx, "thread_extent", nthread_tx)
+            ib.scope_attr(bx, "thread_extent", nthread_bx)
+
+            total_ele[0] = prev_shape[0]
+
+            # Cumulative Reverse Exclusive Multiply
+            multipliers[prev_shape_size - 1] = Cast("int64", 1)
+            with ib.for_range(0, prev_shape_size - 1) as i_:
+                i = i_ + 1
+                multipliers[prev_shape_size - 1 - i] = (
+                    prev_shape[prev_shape_size - i] * multipliers[prev_shape_size - i]
+                )
+                total_ele[0] *= prev_shape[prev_shape_size - i]
+
+            division_total_ele[0] = Cast("int64", 1)
+            with ib.for_range(0, new_shape_size) as i:
+                with ib.if_scope(new_shape[i] != -1):
+                    division_total_ele[0] *= new_shape[i]
+
+            # Compute true output shape (replace negative ones)
+            with ib.for_range(0, new_shape_size) as i:
+                with ib.if_scope(new_shape[i] == -1):
+                    out_new_shape[i] = Cast("int64", div(total_ele[0], division_total_ele[0]))
+                with ib.else_scope():
+                    out_new_shape[i] = new_shape[i]
+
+            # Check if prev_shape and new_shape are equal
+            equal_shape[0] = True
+            with ib.if_scope(prev_shape_size == new_shape_size):
+                with ib.for_range(0, prev_shape_size) as i:
+                    with ib.if_scope(prev_shape[i] != out_new_shape[i]):
+                        equal_shape[0] = False
+            with ib.else_scope():
+                equal_shape[0] = False
+

Review comment:
       Any reason all threads must be at the same spot here ? It can either go only in if scope or else scope ?  




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] comaniac commented on pull request #7477: SparseReshape Op

Posted by GitBox <gi...@apache.org>.

comaniac commented on pull request #7477:
URL: https://github.com/apache/tvm/pull/7477#issuecomment-786787430


   Thanks @codeislife99 @mbrookhart @tkonolige 


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] codeislife99 commented on a change in pull request #7477: SparseReshape Op

Posted by GitBox <gi...@apache.org>.

codeislife99 commented on a change in pull request #7477:
URL: https://github.com/apache/tvm/pull/7477#discussion_r583229957



##########
File path: tests/python/relay/test_op_level3.py
##########
@@ -1311,6 +1311,232 @@ def verify_sparse_to_dense(sparse_indices, sparse_values, default_value, output_
     # verify_sparse_to_dense([[[[0, 1, 4], [0, 2, 4]]]], [[[[3.1, 3.1, 3.1]]]], 3.5, [5], [3.1, 3.1, 3.5, 3.5, 3.1])
 
 
+@tvm.testing.uses_gpu
+@pytest.mark.parametrize(
+    "sparse_indices_np, sparse_values_np, prev_shape_np, new_shape_np",
+    [
+        (
+            np.array([[0, 0, 0], [0, 0, 1], [0, 1, 0], [1, 0, 0], [1, 2, 3]], dtype=np.int64),
+            np.array([7, 5, 6, 3, 9], dtype=np.int64),
+            np.array([2, 3, 6], dtype=np.int64),
+            np.array([9, -1], dtype=np.int64),
+        ),
+        (
+            np.array(
+                [[0, 0, 0, 0], [0, 0, 1, 2], [0, 1, 0, 3], [1, 0, 0, 4], [1, 2, 3, 6]],
+                dtype=np.int64,
+            ),
+            np.array([7, 5, 6, 3, 9], dtype=np.int64),
+            np.array([2, 3, 6, 7], dtype=np.int64),
+            np.array([9, -1, 7], dtype=np.int64),
+        ),
+        (
+            np.array(
+                [
+                    [0, 0, 0, 0, 0],
+                    [0, 0, 1, 2, 3],
+                    [0, 1, 0, 3, 5],
+                    [1, 0, 0, 4, 6],
+                    [1, 2, 3, 6, 8],
+                ],
+                dtype=np.int64,
+            ),
+            np.array([7, 5, 6, 3, 9], dtype=np.int64),
+            np.array([2, 3, 6, 7, 9], dtype=np.int64),
+            np.array([9, -1, 7], dtype=np.int64),
+        ),
+        (
+            np.array([[0, 0], [0, 1], [3, 4], [4, 3], [7, 3]], dtype=np.int64),
+            np.array([7, 5, 6, 3, 9], dtype=np.int64),
+            np.array([9, 4], dtype=np.int64),
+            np.array([2, -1, 6], dtype=np.int64),
+        ),
+        (
+            np.array([[0, 0], [0, 1], [3, 4], [4, 3], [7, 3]], dtype=np.int64),
+            np.array([7, 5, 6, 3, 9], dtype=np.int64),
+            np.array([9, 4], dtype=np.int64),
+            np.array([-1], dtype=np.int64),
+        ),
+        (
+            np.array([[0], [5], [10], [20], [24]], dtype=np.int64),
+            np.array([7, 5, 6, 3, 9], dtype=np.int64),
+            np.array([25], dtype=np.int64),
+            np.array([5, 5], dtype=np.int64),
+        ),
+        (
+            np.array([[0, 100], [200, 100], [300, 400], [50, 20], [400, 50]], dtype=np.int64),
+            np.array([7, 5, 6, 3, 9], dtype=np.int64),
+            np.array([500, 20], dtype=np.int64),
+            np.array([500, 20], dtype=np.int64),
+        ),
+        (
+            np.array([[0, 100], [200, 100], [300, 400], [50, 20], [400, 50]], dtype=np.int64),
+            np.array([7, 5, 6, 3, 9], dtype=np.int64),
+            np.array([500, 20], dtype=np.int64),
+            np.array([500, -1], dtype=np.int64),
+        ),
+        (
+            np.array([[0, 100], [200, 100], [300, 400], [50, 20], [400, 50]], dtype=np.int64),
+            np.array([7, 5, 6, 3, 9], dtype=np.int64),
+            np.array([500, 20], dtype=np.int64),
+            np.array([250, 40], dtype=np.int64),
+        ),
+        (
+            np.ones((0, 1), dtype=np.int64),
+            np.array([], dtype=np.int64),
+            np.array([4], dtype=np.int64),
+            np.array([2, -1], dtype=np.int64),
+        ),
+        (
+            np.ones((0, 1), dtype=np.int64),
+            np.array([], dtype=np.int64),
+            np.array([4], dtype=np.int64),
+            np.array([2, 2], dtype=np.int64),
+        ),
+        (
+            np.ones((0, 2), dtype=np.int64),
+            np.array([], dtype=np.int64),
+            np.array([3, 6], dtype=np.int64),
+            np.array([-1, 2], dtype=np.int64),
+        ),
+    ],
+)
+@pytest.mark.parametrize("dtype", [np.int32, np.int64])
+@pytest.mark.parametrize("use_dyn", [True, False])
+def test_sparse_reshape(
+    sparse_indices_np, sparse_values_np, prev_shape_np, new_shape_np, dtype, use_dyn
+):
+    def ref_sparse_reshape(

Review comment:
       Done. 




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] codeislife99 commented on a change in pull request #7477: SparseReshape Op

Posted by GitBox <gi...@apache.org>.

codeislife99 commented on a change in pull request #7477:
URL: https://github.com/apache/tvm/pull/7477#discussion_r582393650



##########
File path: tests/python/relay/test_op_level3.py
##########
@@ -1311,6 +1311,217 @@ def verify_sparse_to_dense(sparse_indices, sparse_values, default_value, output_
     # verify_sparse_to_dense([[[[0, 1, 4], [0, 2, 4]]]], [[[[3.1, 3.1, 3.1]]]], 3.5, [5], [3.1, 3.1, 3.5, 3.5, 3.1])
 
 
+@tvm.testing.uses_gpu
+@pytest.mark.parametrize(
+    "sparse_indices_np, sparse_values_np, prev_shape_np, new_shape_np",
+    [
+        (
+            np.array([[0, 0, 0], [0, 0, 1], [0, 1, 0], [1, 0, 0], [1, 2, 3]], dtype=np.int64),

Review comment:
       Done.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] tkonolige commented on a change in pull request #7477: SparseReshape Op

Posted by GitBox <gi...@apache.org>.

tkonolige commented on a change in pull request #7477:
URL: https://github.com/apache/tvm/pull/7477#discussion_r580430091



##########
File path: python/tvm/topi/cuda/sparse_reshape.py
##########
@@ -0,0 +1,232 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# pylint: disable=invalid-name, too-many-arguments, too-many-nested-blocks
+"""Sparse_Reshape operator"""
+import tvm
+from tvm import te
+from ...tir import decl_buffer, ir_builder, Cast
+from ...te import extern, div, floordiv, floormod
+from ..utils import ceil_div
+
+
+def sparse_reshape(
+    sparse_indices,
+    prev_shape,
+    new_shape,
+    new_sparse_indices_shape,
+    new_shape_shape,
+):
+    """
+    Reshape a Sparse Tensor
+    Parameters
+    ----------
+    sparse_indices : relay.Expr
+        A 2-D tensor[N, n_dim] of integers containing location of sparse values, where N is the
+        number of sparse values and n_dim is the number of dimensions of the dense_shape
+    prev_shape : relay.Expr
+        A 1-D tensor containing the previous shape of the dense tensor
+    new_shape : relay.Expr
+        A 1-D tensor containing the new shape of the dense tensor
+    Returns
+    -------
+    result: relay.Expr
+        Output tensor.
+    Examples
+    --------
+    .. code-block:: python
+        sparse_indices = [[0, 0, 0],
+                            [0, 0, 1],
+                            [0, 1, 0],
+                            [1, 0, 0],
+                            [1, 2, 3]]
+        prev_shape = [2, 3, 4]
+        new_shape = [9, -1]
+        new_sparse_indices, new_shape = relay.sparse_reshape(sparse_indices,
+                            prev_shape,
+                            new_shape)
+        new_sparse_indices = [[0, 0],
+                              [0, 1],
+                              [1, 2],
+                              [4, 2],
+                              [8, 1]]
+        new_shape = [9, 4]
+    """
+
+    def gen_ir(
+        sparse_indices_ptr,
+        prev_shape_ptr,
+        new_shape_ptr,
+        new_sparse_indices_ptr,
+        out_new_shape_ptr,
+    ):
+        ib = ir_builder.create()
+
+        sparse_indices = ib.buffer_ptr(sparse_indices_ptr)
+        prev_shape = ib.buffer_ptr(prev_shape_ptr)
+
+        new_shape = ib.buffer_ptr(new_shape_ptr)
+        out_new_shape = ib.buffer_ptr(out_new_shape_ptr)
+        new_sparse_indices = ib.buffer_ptr(new_sparse_indices_ptr)
+        out_new_shape = ib.buffer_ptr(out_new_shape_ptr)
+
+        prev_shape_size = prev_shape_ptr.shape[0]
+        new_shape_size = new_shape_ptr.shape[0]
+
+        multipliers = ib.allocate("int64", (prev_shape_size,), name="multipliers", scope="local")
+        dividers = ib.allocate("int64", (new_shape_size,), name="dividers", scope="local")
+        flattened_indices = ib.allocate(
+            "int64", (sparse_indices_ptr.shape[0],), name="flattened_indices", scope="local"
+        )
+        total_ele = ib.allocate("int64", (1,), name="total_ele", scope="local")
+        division_total_ele = ib.allocate("int64", (1,), name="division_total_ele", scope="local")
+        equal_shape = ib.allocate("bool", (1,), name="equal_shape", scope="local")
+        max_threads = int(tvm.target.Target.current(allow_none=False).max_num_threads)

Review comment:
       These should all probably have `scope="global"` given that you are using them across threads and warps.

##########
File path: python/tvm/topi/cuda/sparse_reshape.py
##########
@@ -0,0 +1,232 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# pylint: disable=invalid-name, too-many-arguments, too-many-nested-blocks
+"""Sparse_Reshape operator"""
+import tvm
+from tvm import te
+from ...tir import decl_buffer, ir_builder, Cast
+from ...te import extern, div, floordiv, floormod
+from ..utils import ceil_div
+
+
+def sparse_reshape(
+    sparse_indices,
+    prev_shape,
+    new_shape,
+    new_sparse_indices_shape,
+    new_shape_shape,
+):
+    """
+    Reshape a Sparse Tensor
+    Parameters
+    ----------
+    sparse_indices : relay.Expr
+        A 2-D tensor[N, n_dim] of integers containing location of sparse values, where N is the
+        number of sparse values and n_dim is the number of dimensions of the dense_shape
+    prev_shape : relay.Expr
+        A 1-D tensor containing the previous shape of the dense tensor
+    new_shape : relay.Expr
+        A 1-D tensor containing the new shape of the dense tensor
+    Returns
+    -------
+    result: relay.Expr
+        Output tensor.
+    Examples
+    --------
+    .. code-block:: python
+        sparse_indices = [[0, 0, 0],
+                            [0, 0, 1],
+                            [0, 1, 0],
+                            [1, 0, 0],
+                            [1, 2, 3]]
+        prev_shape = [2, 3, 4]
+        new_shape = [9, -1]
+        new_sparse_indices, new_shape = relay.sparse_reshape(sparse_indices,
+                            prev_shape,
+                            new_shape)
+        new_sparse_indices = [[0, 0],
+                              [0, 1],
+                              [1, 2],
+                              [4, 2],
+                              [8, 1]]
+        new_shape = [9, 4]
+    """
+
+    def gen_ir(
+        sparse_indices_ptr,
+        prev_shape_ptr,
+        new_shape_ptr,
+        new_sparse_indices_ptr,
+        out_new_shape_ptr,
+    ):
+        ib = ir_builder.create()
+
+        sparse_indices = ib.buffer_ptr(sparse_indices_ptr)
+        prev_shape = ib.buffer_ptr(prev_shape_ptr)
+
+        new_shape = ib.buffer_ptr(new_shape_ptr)
+        out_new_shape = ib.buffer_ptr(out_new_shape_ptr)
+        new_sparse_indices = ib.buffer_ptr(new_sparse_indices_ptr)
+        out_new_shape = ib.buffer_ptr(out_new_shape_ptr)
+
+        prev_shape_size = prev_shape_ptr.shape[0]
+        new_shape_size = new_shape_ptr.shape[0]
+
+        multipliers = ib.allocate("int64", (prev_shape_size,), name="multipliers", scope="local")
+        dividers = ib.allocate("int64", (new_shape_size,), name="dividers", scope="local")
+        flattened_indices = ib.allocate(
+            "int64", (sparse_indices_ptr.shape[0],), name="flattened_indices", scope="local"
+        )
+        total_ele = ib.allocate("int64", (1,), name="total_ele", scope="local")
+        division_total_ele = ib.allocate("int64", (1,), name="division_total_ele", scope="local")
+        equal_shape = ib.allocate("bool", (1,), name="equal_shape", scope="local")
+        max_threads = int(tvm.target.Target.current(allow_none=False).max_num_threads)
+        with ib.new_scope():
+            # The computation in this block is very very miniscule since we are just iterating over
+            # shape tensors which are very small (< 10) and there is no need of parallelization
+            nthread_tx = 1
+            nthread_bx = 1
+            tx = te.thread_axis("threadIdx.x")
+            bx = te.thread_axis("blockIdx.x")
+            ib.scope_attr(tx, "thread_extent", nthread_tx)
+            ib.scope_attr(bx, "thread_extent", nthread_bx)
+
+            total_ele[0] = prev_shape[0]
+
+            # Cumulative Reverse Exclusive Multiply
+            multipliers[prev_shape_size - 1] = Cast("int64", 1)
+            with ib.for_range(0, prev_shape_size - 1) as i_:
+                i = i_ + 1
+                multipliers[prev_shape_size - 1 - i] = (
+                    prev_shape[prev_shape_size - i] * multipliers[prev_shape_size - i]
+                )
+                total_ele[0] *= prev_shape[prev_shape_size - i]
+
+            division_total_ele[0] = Cast("int64", 1)
+            with ib.for_range(0, new_shape_size) as i:
+                with ib.if_scope(new_shape[i] != -1):
+                    division_total_ele[0] *= new_shape[i]
+
+            # Compute true output shape (replace negative ones)
+            with ib.for_range(0, new_shape_size) as i:
+                with ib.if_scope(new_shape[i] == -1):
+                    out_new_shape[i] = Cast("int64", div(total_ele[0], division_total_ele[0]))
+                with ib.else_scope():
+                    out_new_shape[i] = new_shape[i]
+
+            # Check if prev_shape and new_shape are equal
+            equal_shape[0] = True
+            with ib.if_scope(prev_shape_size == new_shape_size):
+                with ib.for_range(0, prev_shape_size) as i:
+                    with ib.if_scope(prev_shape[i] != out_new_shape[i]):
+                        equal_shape[0] = False
+            with ib.else_scope():
+                equal_shape[0] = False
+

Review comment:
       You probably want a global barrier here to make sure all threads are at the same spot. `ib.emit(tvm.tir.Call(None, "tir.tvm_storage_sync", tvm.runtime.convert(["shared"])))` does it for shared, not sure how to do global.

##########
File path: tests/python/relay/test_op_level3.py
##########
@@ -1311,6 +1311,217 @@ def verify_sparse_to_dense(sparse_indices, sparse_values, default_value, output_
     # verify_sparse_to_dense([[[[0, 1, 4], [0, 2, 4]]]], [[[[3.1, 3.1, 3.1]]]], 3.5, [5], [3.1, 3.1, 3.5, 3.5, 3.1])
 
 
+@tvm.testing.uses_gpu
+@pytest.mark.parametrize(
+    "sparse_indices_np, sparse_values_np, prev_shape_np, new_shape_np",
+    [
+        (
+            np.array([[0, 0, 0], [0, 0, 1], [0, 1, 0], [1, 0, 0], [1, 2, 3]], dtype=np.int64),

Review comment:
       Can you test with int32 on one of these.

##########
File path: python/tvm/topi/cuda/sparse_reshape.py
##########
@@ -0,0 +1,232 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# pylint: disable=invalid-name, too-many-arguments, too-many-nested-blocks
+"""Sparse_Reshape operator"""
+import tvm
+from tvm import te
+from ...tir import decl_buffer, ir_builder, Cast
+from ...te import extern, div, floordiv, floormod
+from ..utils import ceil_div
+
+
+def sparse_reshape(
+    sparse_indices,
+    prev_shape,
+    new_shape,
+    new_sparse_indices_shape,
+    new_shape_shape,
+):
+    """
+    Reshape a Sparse Tensor
+    Parameters
+    ----------
+    sparse_indices : relay.Expr
+        A 2-D tensor[N, n_dim] of integers containing location of sparse values, where N is the
+        number of sparse values and n_dim is the number of dimensions of the dense_shape
+    prev_shape : relay.Expr
+        A 1-D tensor containing the previous shape of the dense tensor
+    new_shape : relay.Expr
+        A 1-D tensor containing the new shape of the dense tensor
+    Returns
+    -------
+    result: relay.Expr
+        Output tensor.
+    Examples
+    --------
+    .. code-block:: python
+        sparse_indices = [[0, 0, 0],
+                            [0, 0, 1],
+                            [0, 1, 0],
+                            [1, 0, 0],
+                            [1, 2, 3]]
+        prev_shape = [2, 3, 4]
+        new_shape = [9, -1]
+        new_sparse_indices, new_shape = relay.sparse_reshape(sparse_indices,
+                            prev_shape,
+                            new_shape)
+        new_sparse_indices = [[0, 0],
+                              [0, 1],
+                              [1, 2],
+                              [4, 2],
+                              [8, 1]]
+        new_shape = [9, 4]
+    """
+
+    def gen_ir(
+        sparse_indices_ptr,
+        prev_shape_ptr,
+        new_shape_ptr,
+        new_sparse_indices_ptr,
+        out_new_shape_ptr,
+    ):
+        ib = ir_builder.create()
+
+        sparse_indices = ib.buffer_ptr(sparse_indices_ptr)
+        prev_shape = ib.buffer_ptr(prev_shape_ptr)
+
+        new_shape = ib.buffer_ptr(new_shape_ptr)
+        out_new_shape = ib.buffer_ptr(out_new_shape_ptr)
+        new_sparse_indices = ib.buffer_ptr(new_sparse_indices_ptr)
+        out_new_shape = ib.buffer_ptr(out_new_shape_ptr)
+
+        prev_shape_size = prev_shape_ptr.shape[0]
+        new_shape_size = new_shape_ptr.shape[0]
+
+        multipliers = ib.allocate("int64", (prev_shape_size,), name="multipliers", scope="local")
+        dividers = ib.allocate("int64", (new_shape_size,), name="dividers", scope="local")
+        flattened_indices = ib.allocate(
+            "int64", (sparse_indices_ptr.shape[0],), name="flattened_indices", scope="local"
+        )
+        total_ele = ib.allocate("int64", (1,), name="total_ele", scope="local")
+        division_total_ele = ib.allocate("int64", (1,), name="division_total_ele", scope="local")
+        equal_shape = ib.allocate("bool", (1,), name="equal_shape", scope="local")
+        max_threads = int(tvm.target.Target.current(allow_none=False).max_num_threads)
+        with ib.new_scope():
+            # The computation in this block is very very miniscule since we are just iterating over
+            # shape tensors which are very small (< 10) and there is no need of parallelization
+            nthread_tx = 1
+            nthread_bx = 1
+            tx = te.thread_axis("threadIdx.x")
+            bx = te.thread_axis("blockIdx.x")
+            ib.scope_attr(tx, "thread_extent", nthread_tx)
+            ib.scope_attr(bx, "thread_extent", nthread_bx)
+
+            total_ele[0] = prev_shape[0]
+
+            # Cumulative Reverse Exclusive Multiply
+            multipliers[prev_shape_size - 1] = Cast("int64", 1)
+            with ib.for_range(0, prev_shape_size - 1) as i_:
+                i = i_ + 1
+                multipliers[prev_shape_size - 1 - i] = (
+                    prev_shape[prev_shape_size - i] * multipliers[prev_shape_size - i]
+                )
+                total_ele[0] *= prev_shape[prev_shape_size - i]
+
+            division_total_ele[0] = Cast("int64", 1)
+            with ib.for_range(0, new_shape_size) as i:
+                with ib.if_scope(new_shape[i] != -1):
+                    division_total_ele[0] *= new_shape[i]
+
+            # Compute true output shape (replace negative ones)
+            with ib.for_range(0, new_shape_size) as i:
+                with ib.if_scope(new_shape[i] == -1):
+                    out_new_shape[i] = Cast("int64", div(total_ele[0], division_total_ele[0]))
+                with ib.else_scope():
+                    out_new_shape[i] = new_shape[i]
+
+            # Check if prev_shape and new_shape are equal
+            equal_shape[0] = True
+            with ib.if_scope(prev_shape_size == new_shape_size):
+                with ib.for_range(0, prev_shape_size) as i:
+                    with ib.if_scope(prev_shape[i] != out_new_shape[i]):
+                        equal_shape[0] = False
+            with ib.else_scope():
+                equal_shape[0] = False
+
+        with ib.new_scope():
+            nthread_tx = max_threads
+            nthread_bx = ceil_div(sparse_indices_ptr.shape[0], max_threads)
+            tx = te.thread_axis("threadIdx.x")
+            bx = te.thread_axis("blockIdx.x")
+            ib.scope_attr(tx, "thread_extent", nthread_tx)
+            ib.scope_attr(bx, "thread_extent", nthread_bx)
+
+            row_number = bx * max_threads + tx
+
+            # Return same inputs if shapes are equal
+            with ib.if_scope(equal_shape[0]):
+                with ib.if_scope(row_number < sparse_indices_ptr.shape[0]):
+                    with ib.for_range(0, sparse_indices_ptr.shape[1]) as j:
+                        new_sparse_indices[row_number, j] = sparse_indices[row_number, j]
+
+            # Else compute new_sparse_indices
+            with ib.else_scope():
+                dividers[new_shape_size - 1] = Cast("int64", 1)
+                with ib.for_range(0, new_shape_size - 1) as i_:
+                    i = i_ + 1
+                    dividers[new_shape_size - 1 - i] = (
+                        dividers[new_shape_size - i] * out_new_shape[new_shape_size - i]
+                    )
+
+                with ib.if_scope(row_number < sparse_indices_ptr.shape[0]):
+                    flattened_indices[row_number] = Cast("int64", 0)
+                    with ib.for_range(0, sparse_indices_ptr.shape[1]) as j:
+                        flattened_indices[row_number] += (
+                            sparse_indices[row_number, j] * multipliers[j]
+                        )
+
+                with ib.if_scope(row_number < sparse_indices_ptr.shape[0]):
+                    current_element = ib.allocate(
+                        "int64", (1,), name="current_element", scope="local"
+                    )
+                    current_element[0] = flattened_indices[row_number]
+
+                    with ib.for_range(0, new_sparse_indices_ptr.shape[1]) as j:
+                        new_sparse_indices[row_number, j] = Cast(
+                            "int64", floordiv(current_element[0], dividers[j])
+                        )
+                        current_element[0] = floormod(current_element[0], dividers[j])
+
+        return ib.get()
+
+    new_sparse_indices_buf = decl_buffer(
+        new_sparse_indices_shape, "int64", "new_sparse_indices_buf"
+    )
+    new_shape_buf = decl_buffer(new_shape_shape, "int64", "new_shape_buf")
+
+    return extern(
+        [new_sparse_indices_shape, new_shape_shape],
+        [sparse_indices, prev_shape, new_shape],
+        lambda ins, outs: gen_ir(ins[0], ins[1], ins[2], outs[0], outs[1]),
+        dtype="int64",
+        out_buffers=[new_sparse_indices_buf, new_shape_buf],
+        name="sparse_reshape_cuda",
+        tag="sparse_reshape_cuda",
+    )
+
+
+def _default_schedule(outs):
+    """Default schedule for gpu."""
+    outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
+    s = te.create_schedule([x.op for x in outs])
+    scheduled_ops = []
+
+    def traverse(op):

Review comment:
       Can you use `traverse_inline` instead?

##########
File path: python/tvm/topi/cuda/sparse_reshape.py
##########
@@ -0,0 +1,232 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# pylint: disable=invalid-name, too-many-arguments, too-many-nested-blocks
+"""Sparse_Reshape operator"""
+import tvm
+from tvm import te
+from ...tir import decl_buffer, ir_builder, Cast
+from ...te import extern, div, floordiv, floormod
+from ..utils import ceil_div
+
+
+def sparse_reshape(
+    sparse_indices,
+    prev_shape,
+    new_shape,
+    new_sparse_indices_shape,
+    new_shape_shape,
+):
+    """
+    Reshape a Sparse Tensor
+    Parameters
+    ----------
+    sparse_indices : relay.Expr
+        A 2-D tensor[N, n_dim] of integers containing location of sparse values, where N is the
+        number of sparse values and n_dim is the number of dimensions of the dense_shape
+    prev_shape : relay.Expr
+        A 1-D tensor containing the previous shape of the dense tensor
+    new_shape : relay.Expr
+        A 1-D tensor containing the new shape of the dense tensor
+    Returns
+    -------
+    result: relay.Expr
+        Output tensor.
+    Examples
+    --------
+    .. code-block:: python
+        sparse_indices = [[0, 0, 0],
+                            [0, 0, 1],
+                            [0, 1, 0],
+                            [1, 0, 0],
+                            [1, 2, 3]]
+        prev_shape = [2, 3, 4]
+        new_shape = [9, -1]
+        new_sparse_indices, new_shape = relay.sparse_reshape(sparse_indices,
+                            prev_shape,
+                            new_shape)
+        new_sparse_indices = [[0, 0],
+                              [0, 1],
+                              [1, 2],
+                              [4, 2],
+                              [8, 1]]
+        new_shape = [9, 4]
+    """
+
+    def gen_ir(
+        sparse_indices_ptr,
+        prev_shape_ptr,
+        new_shape_ptr,
+        new_sparse_indices_ptr,
+        out_new_shape_ptr,
+    ):
+        ib = ir_builder.create()
+
+        sparse_indices = ib.buffer_ptr(sparse_indices_ptr)
+        prev_shape = ib.buffer_ptr(prev_shape_ptr)
+
+        new_shape = ib.buffer_ptr(new_shape_ptr)
+        out_new_shape = ib.buffer_ptr(out_new_shape_ptr)
+        new_sparse_indices = ib.buffer_ptr(new_sparse_indices_ptr)
+        out_new_shape = ib.buffer_ptr(out_new_shape_ptr)
+
+        prev_shape_size = prev_shape_ptr.shape[0]
+        new_shape_size = new_shape_ptr.shape[0]
+
+        multipliers = ib.allocate("int64", (prev_shape_size,), name="multipliers", scope="local")
+        dividers = ib.allocate("int64", (new_shape_size,), name="dividers", scope="local")
+        flattened_indices = ib.allocate(
+            "int64", (sparse_indices_ptr.shape[0],), name="flattened_indices", scope="local"
+        )
+        total_ele = ib.allocate("int64", (1,), name="total_ele", scope="local")
+        division_total_ele = ib.allocate("int64", (1,), name="division_total_ele", scope="local")
+        equal_shape = ib.allocate("bool", (1,), name="equal_shape", scope="local")
+        max_threads = int(tvm.target.Target.current(allow_none=False).max_num_threads)
+        with ib.new_scope():
+            # The computation in this block is very very miniscule since we are just iterating over
+            # shape tensors which are very small (< 10) and there is no need of parallelization
+            nthread_tx = 1
+            nthread_bx = 1
+            tx = te.thread_axis("threadIdx.x")
+            bx = te.thread_axis("blockIdx.x")
+            ib.scope_attr(tx, "thread_extent", nthread_tx)
+            ib.scope_attr(bx, "thread_extent", nthread_bx)
+
+            total_ele[0] = prev_shape[0]
+
+            # Cumulative Reverse Exclusive Multiply
+            multipliers[prev_shape_size - 1] = Cast("int64", 1)
+            with ib.for_range(0, prev_shape_size - 1) as i_:
+                i = i_ + 1
+                multipliers[prev_shape_size - 1 - i] = (
+                    prev_shape[prev_shape_size - i] * multipliers[prev_shape_size - i]
+                )
+                total_ele[0] *= prev_shape[prev_shape_size - i]
+
+            division_total_ele[0] = Cast("int64", 1)
+            with ib.for_range(0, new_shape_size) as i:
+                with ib.if_scope(new_shape[i] != -1):
+                    division_total_ele[0] *= new_shape[i]
+
+            # Compute true output shape (replace negative ones)
+            with ib.for_range(0, new_shape_size) as i:
+                with ib.if_scope(new_shape[i] == -1):
+                    out_new_shape[i] = Cast("int64", div(total_ele[0], division_total_ele[0]))
+                with ib.else_scope():
+                    out_new_shape[i] = new_shape[i]
+
+            # Check if prev_shape and new_shape are equal
+            equal_shape[0] = True
+            with ib.if_scope(prev_shape_size == new_shape_size):
+                with ib.for_range(0, prev_shape_size) as i:
+                    with ib.if_scope(prev_shape[i] != out_new_shape[i]):
+                        equal_shape[0] = False
+            with ib.else_scope():
+                equal_shape[0] = False
+
+        with ib.new_scope():
+            nthread_tx = max_threads
+            nthread_bx = ceil_div(sparse_indices_ptr.shape[0], max_threads)
+            tx = te.thread_axis("threadIdx.x")
+            bx = te.thread_axis("blockIdx.x")
+            ib.scope_attr(tx, "thread_extent", nthread_tx)
+            ib.scope_attr(bx, "thread_extent", nthread_bx)
+
+            row_number = bx * max_threads + tx
+
+            # Return same inputs if shapes are equal
+            with ib.if_scope(equal_shape[0]):
+                with ib.if_scope(row_number < sparse_indices_ptr.shape[0]):
+                    with ib.for_range(0, sparse_indices_ptr.shape[1]) as j:
+                        new_sparse_indices[row_number, j] = sparse_indices[row_number, j]
+
+            # Else compute new_sparse_indices
+            with ib.else_scope():
+                dividers[new_shape_size - 1] = Cast("int64", 1)
+                with ib.for_range(0, new_shape_size - 1) as i_:
+                    i = i_ + 1
+                    dividers[new_shape_size - 1 - i] = (
+                        dividers[new_shape_size - i] * out_new_shape[new_shape_size - i]
+                    )
+
+                with ib.if_scope(row_number < sparse_indices_ptr.shape[0]):
+                    flattened_indices[row_number] = Cast("int64", 0)
+                    with ib.for_range(0, sparse_indices_ptr.shape[1]) as j:
+                        flattened_indices[row_number] += (
+                            sparse_indices[row_number, j] * multipliers[j]
+                        )
+
+                with ib.if_scope(row_number < sparse_indices_ptr.shape[0]):
+                    current_element = ib.allocate(
+                        "int64", (1,), name="current_element", scope="local"
+                    )
+                    current_element[0] = flattened_indices[row_number]
+
+                    with ib.for_range(0, new_sparse_indices_ptr.shape[1]) as j:
+                        new_sparse_indices[row_number, j] = Cast(
+                            "int64", floordiv(current_element[0], dividers[j])
+                        )
+                        current_element[0] = floormod(current_element[0], dividers[j])
+
+        return ib.get()
+
+    new_sparse_indices_buf = decl_buffer(
+        new_sparse_indices_shape, "int64", "new_sparse_indices_buf"
+    )
+    new_shape_buf = decl_buffer(new_shape_shape, "int64", "new_shape_buf")
+
+    return extern(
+        [new_sparse_indices_shape, new_shape_shape],
+        [sparse_indices, prev_shape, new_shape],
+        lambda ins, outs: gen_ir(ins[0], ins[1], ins[2], outs[0], outs[1]),
+        dtype="int64",

Review comment:
       The output dtype should match the input dtype.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org