You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tvm.apache.org by GitBox <gi...@apache.org> on 2020/10/01 03:29:22 UTC
[GitHub] [incubator-tvm] vinx13 commented on a change in pull request #6580: Faster sparse_dense on GPUs

vinx13 commented on a change in pull request #6580:
URL: https://github.com/apache/incubator-tvm/pull/6580#discussion_r497701322



##########
File path: tests/python/topi/python/test_topi_sparse.py
##########
@@ -342,38 +343,29 @@ def verify_sparse_dense_bsr(M, N, K, BS_R, BS_C, density, use_relu):
     W_indptr = te.placeholder(shape=W_sp_np.indptr.shape, dtype=str(W_sp_np.indptr.dtype))
     X = te.placeholder(shape=X_np.shape, dtype=str(X_np.dtype))
 
-    def check_device(device):
-        ctx = tvm.context(device, 0)
-        if not tvm.testing.device_enabled(device):
-            print("Skip because %s is not enabled" % device)
-            return
-        print("Running on target: %s" % device)
-        fcompute, fschedule = tvm.topi.testing.dispatch(device, _sparse_dense_implement)
-        with tvm.target.Target(device):
-            Y = fcompute(X, W_data, W_indices, W_indptr)
-            if use_relu:
-                Y = topi.nn.relu(Y)
-            s = fschedule([Y])
-            func = tvm.build(s, [X, W_data, W_indices, W_indptr, Y])
-            Y_tvm = tvm.nd.array(np.zeros(Y_np.shape, dtype=Y_np.dtype), ctx=ctx)
-            func(
-                tvm.nd.array(X_np, ctx=ctx),
-                tvm.nd.array(W_sp_np.data, ctx=ctx),
-                tvm.nd.array(W_sp_np.indices, ctx=ctx),
-                tvm.nd.array(W_sp_np.indptr, ctx=ctx),
-                Y_tvm,
-            )
-            tvm.testing.assert_allclose(Y_tvm.asnumpy(), Y_np, atol=1e-4, rtol=1e-4)
-
-    for device in ["llvm", "cuda"]:
-        check_device(device)
+    fcompute, fschedule = tvm.topi.testing.dispatch(target, _sparse_dense_implement)
+    with tvm.target.Target(target):
+        Y = fcompute(X, W_data, W_indices, W_indptr)
+        if use_relu:
+            Y = topi.nn.relu(Y)
+        s = fschedule([Y])
+        func = tvm.build(s, [X, W_data, W_indices, W_indptr, Y])
+        Y_tvm = tvm.nd.array(np.zeros(Y_np.shape, dtype=Y_np.dtype), ctx=ctx)
+        func(
+            tvm.nd.array(X_np, ctx=ctx),
+            tvm.nd.array(W_sp_np.data, ctx=ctx),
+            tvm.nd.array(W_sp_np.indices, ctx=ctx),
+            tvm.nd.array(W_sp_np.indptr, ctx=ctx),
+            Y_tvm,
+        )
+        tvm.testing.assert_allclose(Y_tvm.asnumpy(), Y_np, atol=1e-4, rtol=1e-4)
 
 
-@tvm.testing.uses_gpu
-def test_sparse_dense_bsr():
+@tvm.testing.parametrize_targets("llvm", "cuda")
+def test_sparse_dense_bsr_relu(ctx, target):

Review comment:
       enable this test?

##########
File path: src/relay/op/nn/sparse.cc
##########
@@ -82,6 +82,35 @@ RELAY_REGISTER_OP("nn.sparse_dense")
 - **weight**: `(units, input_dim)`
 - **out**: `(x1, x2, ..., xn, units)`.
 
+)code" TVM_ADD_FILELINE)
+    .set_attrs_type<SparseDenseAttrs>()
+    .set_num_inputs(4)
+    .add_argument("data", "nD Tensor", "Input data.")
+    .add_argument("weight_data", "1D Tensor", "Weight data matrix.")
+    .add_argument("weight_indices", "1D Tensor", "Weight indices matrix.")
+    .add_argument("weight_indptr", "1D Tensor", "Weight indptr matrix.")
+    .set_support_level(1)
+    .add_type_rel("SparseDense", SparseDenseRel);
+
+Expr MakeSparseDensePadded(Expr data, Expr weight_data, Expr weight_indices, Expr weight_indptr) {
+  auto attrs = make_object<SparseDenseAttrs>();
+  static const Op& op = Op::Get("nn.sparse_dense_padded");
+  return Call(op, {data, weight_data, weight_indices, weight_indptr}, Attrs(attrs), {});
+}
+
+TVM_REGISTER_GLOBAL("relay.op.nn._make.sparse_dense_padded")
+    .set_body([](const TVMArgs& args, TVMRetValue* rv) {
+      runtime::detail::unpack_call<Expr, 4>(MakeSparseDensePadded, args, rv);
+    });
+
+RELAY_REGISTER_OP("nn.sparse_dense_padded")
+    .describe(
+        R"code(Applies a sparse linear transformation: :math:`Y = XW^T` with X sparse. This variation uses a matrix with row lengths padded to a multiple of 32 for better GPU performance.

Review comment:
       with W sparse?




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org