You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tvm.apache.org by ju...@apache.org on 2023/01/26 23:54:41 UTC
[tvm] branch main updated: [ROCM] Fixes compiling on ROCM 5 and accuracy on dense op (#13847)
This is an automated email from the ASF dual-hosted git repository.
junrushao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git
The following commit(s) were added to refs/heads/main by this push:
new ec72ac6690 [ROCM] Fixes compiling on ROCM 5 and accuracy on dense op (#13847)
ec72ac6690 is described below
commit ec72ac66900b0a1a661fcccff1f48ca48b5d5739
Author: masahi <ma...@gmail.com>
AuthorDate: Fri Jan 27 08:54:33 2023 +0900
[ROCM] Fixes compiling on ROCM 5 and accuracy on dense op (#13847)
Fixes https://github.com/apache/tvm/issues/13666
* Some bitcode files need to be updated
* There is a strange, device-dependent accuracy issue when using the default topi `dense` op schedule on AMDGPU (see the issue above). I confirmed that other schedule works fine.
---
python/tvm/contrib/rocm.py | 4 ++--
python/tvm/relay/op/strategy/cuda.py | 15 +++++++++------
2 files changed, 11 insertions(+), 8 deletions(-)
diff --git a/python/tvm/contrib/rocm.py b/python/tvm/contrib/rocm.py
index 4f62f1a8da..372281dbab 100644
--- a/python/tvm/contrib/rocm.py
+++ b/python/tvm/contrib/rocm.py
@@ -141,9 +141,7 @@ def callback_rocm_bitcode_path(rocdl_dir=None):
bitcode_names = [
"oclc_daz_opt_on",
"ocml",
- "hc",
"irif", # this does not exist in rocm 3.9, drop eventually
- "ockl",
"oclc_correctly_rounded_sqrt_off",
"oclc_correctly_rounded_sqrt_on",
"oclc_daz_opt_off",
@@ -152,9 +150,11 @@ def callback_rocm_bitcode_path(rocdl_dir=None):
"oclc_isa_version_803", # todo (t-vi): an alternative might be to scan for the
"oclc_isa_version_900", # isa version files (if the linker throws out
"oclc_isa_version_906", # the unneeded ones or we filter for the arch we need)
+ "oclc_isa_version_1030",
"oclc_unsafe_math_off",
"oclc_unsafe_math_on",
"oclc_wavefrontsize64_on",
+ "oclc_abi_version_500",
]
bitcode_files = []
diff --git a/python/tvm/relay/op/strategy/cuda.py b/python/tvm/relay/op/strategy/cuda.py
index cc43809266..fa295c93a1 100644
--- a/python/tvm/relay/op/strategy/cuda.py
+++ b/python/tvm/relay/op/strategy/cuda.py
@@ -918,13 +918,16 @@ def dense_strategy_cuda(attrs, inputs, out_type, target):
name="dense_int8.cuda",
)
else:
- strategy.add_implementation(
- wrap_compute_dense(topi.gpu.dense_small_batch),
- wrap_topi_schedule(topi.gpu.schedule_dense_small_batch),
- name="dense_small_batch.gpu",
- )
+ # Some AMDGPU cards have accuracy issues with this schedule
+ # See https://github.com/apache/tvm/issues/13666
+ if target.kind.name != "rocm":
+ strategy.add_implementation(
+ wrap_compute_dense(topi.gpu.dense_small_batch),
+ wrap_topi_schedule(topi.gpu.schedule_dense_small_batch),
+ name="dense_small_batch.gpu",
+ )
- with SpecializedCondition(b >= 32):
+ with SpecializedCondition(target.kind.name == "rocm" or b >= 32):
strategy.add_implementation(
wrap_compute_dense(topi.gpu.dense_large_batch),
wrap_topi_schedule(topi.gpu.schedule_dense_large_batch),