You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tvm.apache.org by ex...@apache.org on 2023/06/13 06:50:06 UTC
[tvm] branch unity updated: [Bugfix][CUDA] Fix codegen for hexp for sm >= 52 (#15079)

This is an automated email from the ASF dual-hosted git repository.

expye pushed a commit to branch unity
in repository https://gitbox.apache.org/repos/asf/tvm.git


The following commit(s) were added to refs/heads/unity by this push:
     new 208dd71f18 [Bugfix][CUDA] Fix codegen for hexp for sm >= 52 (#15079)
208dd71f18 is described below

commit 208dd71f1811e60e223a05c94659faaaa8321f9c
Author: Junru Shao <ju...@apache.org>
AuthorDate: Mon Jun 12 23:50:00 2023 -0700

    [Bugfix][CUDA] Fix codegen for hexp for sm >= 52 (#15079)
    
    Fix a minor bug introduced by #15070
    
    Co-authored-by: Ubuntu <ub...@ubuntu.com>
---
 src/target/source/literal/cuda_half_t.h | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/src/target/source/literal/cuda_half_t.h b/src/target/source/literal/cuda_half_t.h
index 518c2be643..67471daf82 100644
--- a/src/target/source/literal/cuda_half_t.h
+++ b/src/target/source/literal/cuda_half_t.h
@@ -294,10 +294,6 @@ __pack_half2(const half x, const half y) {
   return (v1 << 16) | v0;
 }
 
-// Some fp16 math functions are not supported in cuda_fp16.h,
-// so we define them here to make sure the generated CUDA code
-// is valid.
-#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 530)
 #define CUDA_UNSUPPORTED_HALF_MATH_BINARY(HALF_MATH_NAME, FP32_MATH_NAME) \
 static inline __device__ __host__ half HALF_MATH_NAME(half x, half y) {   \
   float tmp_x = __half2float(x);                                          \
@@ -313,17 +309,23 @@ static inline __device__ __host__ half HALF_MATH_NAME(half x) {          \
   return __float2half(result);                                           \
 }
 
+// Some fp16 math functions are not supported in cuda_fp16.h,
+// so we define them here to make sure the generated CUDA code
+// is valid.
+#if defined(__CUDA_ARCH__)
+#if (__CUDA_ARCH__ >= 530)
 CUDA_UNSUPPORTED_HALF_MATH_BINARY(hpow, powf)
 CUDA_UNSUPPORTED_HALF_MATH_UNARY(htanh, tanhf)
 CUDA_UNSUPPORTED_HALF_MATH_UNARY(htan, tanf)
 CUDA_UNSUPPORTED_HALF_MATH_UNARY(hatan, atanf)
 CUDA_UNSUPPORTED_HALF_MATH_UNARY(herf, erf)
+#else
 CUDA_UNSUPPORTED_HALF_MATH_UNARY(hexp, exp)
+#endif
+#endif
 
 #undef CUDA_UNSUPPORTED_HALF_MATH_BINARY
 #undef CUDA_UNSUPPORTED_HALF_MATH_UNARY
-
-#endif
 )";
 
 static constexpr const char* _cuda_bfloat16_util = R"(