You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tvm.apache.org by ma...@apache.org on 2021/01/14 07:07:18 UTC
[tvm] branch main updated: [CUDA] [Codegen] Ensuring atleast one
thread block for dynamism (#7273)
This is an automated email from the ASF dual-hosted git repository.
masahi pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git
The following commit(s) were added to refs/heads/main by this push:
new 8d3c0e7 [CUDA] [Codegen] Ensuring atleast one thread block for dynamism (#7273)
8d3c0e7 is described below
commit 8d3c0e79a4cd449e894ae873fa8244f29b6b13a3
Author: Animesh Jain <an...@umich.edu>
AuthorDate: Wed Jan 13 23:07:01 2021 -0800
[CUDA] [Codegen] Ensuring atleast one thread block for dynamism (#7273)
---
src/runtime/thread_storage_scope.h | 6 +++++-
tests/python/relay/test_any.py | 9 ++++++---
2 files changed, 11 insertions(+), 4 deletions(-)
diff --git a/src/runtime/thread_storage_scope.h b/src/runtime/thread_storage_scope.h
index 1917096..c039360 100644
--- a/src/runtime/thread_storage_scope.h
+++ b/src/runtime/thread_storage_scope.h
@@ -215,7 +215,11 @@ class ThreadAxisConfig {
ThreadWorkLoad w;
std::fill(w.work_size, w.work_size + 6, 1);
for (size_t i = 0; i < arg_index_map_.size(); ++i) {
- w.work_size[arg_index_map_[i]] = static_cast<size_t>(x.values[base_ + i].v_int64);
+ // Dynamic shapes can result in 0 dim size. Guard to ensure that the dim size is atleast 1.
+ size_t size = static_cast<size_t>(x.values[base_ + i].v_int64);
+ if (size > 0) {
+ w.work_size[arg_index_map_[i]] = size;
+ }
}
return w;
}
diff --git a/tests/python/relay/test_any.py b/tests/python/relay/test_any.py
index cb3b5d4..d30e787 100644
--- a/tests/python/relay/test_any.py
+++ b/tests/python/relay/test_any.py
@@ -845,7 +845,7 @@ def test_any_softmax():
verify_any_softmax(any_dims(4), 2, (13, 11, 3, 1), (13, 11, 3, 1))
-def verify_any_topk(data_shape, kval, np_dshape, dtype, const_k=False):
+def verify_any_topk(data_shape, kval, np_dshape, dtype, ret_type="indices", const_k=False):
mod = tvm.IRModule()
data = relay.var("data", shape=data_shape, dtype=dtype)
np_data = np.random.uniform(size=np_dshape).astype(dtype)
@@ -857,7 +857,9 @@ def verify_any_topk(data_shape, kval, np_dshape, dtype, const_k=False):
k = relay.var("k", shape=(), dtype="int32")
args = [data, k]
in_vals = [np_data, kval]
- out = relay.topk(data, k, ret_type="indices")
+ out = relay.topk(data, k, ret_type=ret_type)
+ if ret_type == "both":
+ out = out[0]
mod["main"] = relay.Function(args, out)
sorted = np.argsort(-np_data)
@@ -873,7 +875,8 @@ def verify_any_topk(data_shape, kval, np_dshape, dtype, const_k=False):
def test_any_topk():
verify_any_topk(any_dims(1), 5, (10,), "float32")
verify_any_topk(any_dims(2), 2, (6, 3), "int32")
- verify_any_topk(any_dims(2), 3, (6, 3), "float32", True)
+ verify_any_topk(any_dims(2), 3, (6, 3), "float32", const_k=True)
+ verify_any_topk(any_dims(1), 0, (0,), "float32", ret_type="both")
@tvm.testing.uses_gpu