You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tvm.apache.org by ma...@apache.org on 2021/06/18 07:17:01 UTC
[tvm] branch main updated: [topi][CuDNN] Removed requirement for
GPU from topi conv2d_cudnn.cuda and conv3d_cudnn.cuda (#8276)
This is an automated email from the ASF dual-hosted git repository.
masahi pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git
The following commit(s) were added to refs/heads/main by this push:
new bf3f000 [topi][CuDNN] Removed requirement for GPU from topi conv2d_cudnn.cuda and conv3d_cudnn.cuda (#8276)
bf3f000 is described below
commit bf3f000f8567ae0dd9a34e6f9f7d1a5c8d804849
Author: Lunderberg <Lu...@users.noreply.github.com>
AuthorDate: Fri Jun 18 00:16:45 2021 -0700
[topi][CuDNN] Removed requirement for GPU from topi conv2d_cudnn.cuda and conv3d_cudnn.cuda (#8276)
Previously, `conv2d_cudnn.cuda` would use cudnn's benchmarking
function to select a forward convolution when `cfg.is_fallback`, and
`conv3d_cudnn.cuda` would use cudnn's benchmarking at all times.
After this commit, both expose the cudnn algorithm choice as an
option. If `cfg.is_fallback`, the local device will be benchmarked if
present, otherwise will select a default cudnn implementation.
In the future, to better support RPC use-cases, the fallback config
should be based on cudnn-specific parameters saved in the Target
object.
Co-authored-by: Eric Lunderberg <el...@octoml.ai>
---
python/tvm/contrib/cudnn.py | 18 ++++++++++++++++++
python/tvm/topi/cuda/conv2d.py | 12 +++++++++---
python/tvm/topi/cuda/conv3d.py | 12 +++++++++++-
src/runtime/contrib/cudnn/cudnn_utils.cc | 32 +++++++++++++++++++++++++++++---
src/runtime/contrib/cudnn/cudnn_utils.h | 5 ++++-
5 files changed, 71 insertions(+), 8 deletions(-)
diff --git a/python/tvm/contrib/cudnn.py b/python/tvm/contrib/cudnn.py
index 0e22e0c..ac3835e 100644
--- a/python/tvm/contrib/cudnn.py
+++ b/python/tvm/contrib/cudnn.py
@@ -64,6 +64,24 @@ _BWD_DATA_ALGOS = [
_ALGO_TYPE = ["fwd", "bwd_filter", "bwd_data"]
+def exists():
+ """
+ Checks whether the local machine can use CuDNN.
+
+ Returns
+ -------
+ exists: bool
+
+ True if CuDNN support is enabled and a CuDNN-capable GPU
+ exists. Otherwise, False.
+ """
+ func = tvm.get_global_func("tvm.contrib.cudnn.exists", allow_missing=True)
+ if func is None:
+ return False
+
+ return bool(func())
+
+
def algo_to_index(algo_type, algo_name):
"""Return a index represents the algorithm, which can be used in
calling CuDNN function
diff --git a/python/tvm/topi/cuda/conv2d.py b/python/tvm/topi/cuda/conv2d.py
index 63c7c93..a199534 100644
--- a/python/tvm/topi/cuda/conv2d.py
+++ b/python/tvm/topi/cuda/conv2d.py
@@ -117,9 +117,15 @@ def conv2d_cudnn(
else:
dtype = data.dtype
- cfg.define_knob("algo", range(8))
- if cfg.is_fallback: # Let CUDNN choose the best algo
- cfg["algo"] = OtherOptionEntity(-1)
+ cfg.define_knob("algo", range(cudnn.algo_to_index("fwd", "CUDNN_CONVOLUTION_FWD_ALGO_COUNT")))
+ if cfg.is_fallback:
+ if cudnn.exists():
+ # Let CUDNN choose the best algo, based on benchmarks run
+ # on the local machine. In the future, this should be
+ # based on parameters stored in the Target.
+ cfg["algo"] = OtherOptionEntity(-1)
+ else:
+ cfg["algo"] = OtherOptionEntity(0)
return cudnn.conv_forward(
data,
diff --git a/python/tvm/topi/cuda/conv3d.py b/python/tvm/topi/cuda/conv3d.py
index 530df31..51f1f7a 100644
--- a/python/tvm/topi/cuda/conv3d.py
+++ b/python/tvm/topi/cuda/conv3d.py
@@ -221,6 +221,16 @@ def conv3d_cudnn(
* ((KW - 1) * dilation_w + 1)
)
+ cfg.define_knob("algo", range(cudnn.algo_to_index("fwd", "CUDNN_CONVOLUTION_FWD_ALGO_COUNT")))
+ if cfg.is_fallback:
+ if cudnn.exists():
+ # Let CUDNN choose the best algo, based on benchmarks run
+ # on the local machine. In the future, this should be
+ # based on parameters stored in the Target.
+ cfg["algo"] = OtherOptionEntity(-1)
+ else:
+ cfg["algo"] = OtherOptionEntity(0)
+
return cudnn.conv_forward(
data,
kernel,
@@ -229,7 +239,7 @@ def conv3d_cudnn(
[dilation_d, dilation_h, dilation_w],
conv_mode=1,
tensor_format=tensor_format,
- algo=-1, # let CUDNN choose the best algo
+ algo=cfg["algo"].val,
conv_dtype=dtype,
)
diff --git a/src/runtime/contrib/cudnn/cudnn_utils.cc b/src/runtime/contrib/cudnn/cudnn_utils.cc
index da67c2e..a320c92 100644
--- a/src/runtime/contrib/cudnn/cudnn_utils.cc
+++ b/src/runtime/contrib/cudnn/cudnn_utils.cc
@@ -99,16 +99,38 @@ CuDNNThreadEntry::CuDNNThreadEntry() {
auto func = runtime::Registry::Get("device_api.cuda");
void* ret = (*func)();
cuda_api = static_cast<runtime::DeviceAPI*>(ret);
- CUDNN_CALL(cudnnCreate(&handle));
+
+ // If no CuDNN-capable device is present, allow the CuDNNThreadEntry
+ // object to be created. This is needed for
+ // CuDNNThreadEntry::exists.
+ {
+ cudnnStatus_t create_res = cudnnCreate(&handle);
+ if (create_res == CUDNN_STATUS_NOT_INITIALIZED) {
+ return;
+ }
+ CUDNN_CALL(create_res);
+ }
+
CUDNN_CALL(cudnnSetStream(handle, stream));
conv_entry.cuda_api = cuda_api;
}
-CuDNNThreadEntry::~CuDNNThreadEntry() { CUDNN_CALL(cudnnDestroy(handle)); }
+CuDNNThreadEntry::~CuDNNThreadEntry() {
+ if (handle) {
+ CUDNN_CALL(cudnnDestroy(handle));
+ }
+}
typedef dmlc::ThreadLocalStore<CuDNNThreadEntry> CuDNNThreadStore;
-CuDNNThreadEntry* CuDNNThreadEntry::ThreadLocal() { return CuDNNThreadStore::Get(); }
+CuDNNThreadEntry* CuDNNThreadEntry::ThreadLocal(bool check_exists) {
+ auto* res = CuDNNThreadStore::Get();
+ if (check_exists) {
+ ICHECK(res->exists()) << "CUDNN_STATUS_NOT_INITIALIZED";
+ }
+
+ return res;
+}
// ConvEntry
@@ -148,5 +170,9 @@ SoftmaxEntry::SoftmaxEntry() { CUDNN_CALL(cudnnCreateTensorDescriptor(&shape_des
SoftmaxEntry::~SoftmaxEntry() { CUDNN_CALL(cudnnDestroyTensorDescriptor(shape_desc)); }
+TVM_REGISTER_GLOBAL("tvm.contrib.cudnn.exists").set_body_typed([]() -> bool {
+ return CuDNNThreadEntry::ThreadLocal(false)->exists();
+});
+
} // namespace contrib
} // namespace tvm
diff --git a/src/runtime/contrib/cudnn/cudnn_utils.h b/src/runtime/contrib/cudnn/cudnn_utils.h
index 72380b6..01b92d6 100644
--- a/src/runtime/contrib/cudnn/cudnn_utils.h
+++ b/src/runtime/contrib/cudnn/cudnn_utils.h
@@ -93,11 +93,14 @@ struct SoftmaxEntry {
struct CuDNNThreadEntry {
CuDNNThreadEntry();
~CuDNNThreadEntry();
+
+ bool exists() const { return handle; }
+
cudnnHandle_t handle{nullptr};
ConvEntry conv_entry;
SoftmaxEntry softmax_entry;
runtime::DeviceAPI* cuda_api{nullptr};
- static CuDNNThreadEntry* ThreadLocal();
+ static CuDNNThreadEntry* ThreadLocal(bool check_exists = true);
}; // CuDNNThreadEntry
} // namespace contrib