You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by sx...@apache.org on 2020/08/31 05:10:05 UTC
[incubator-mxnet] branch master updated: Fix fusion of clip if a_min or a_max are not given (#19035)

This is an automated email from the ASF dual-hosted git repository.

sxjscience pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
     new e2aacce  Fix fusion of clip if a_min or a_max are not given (#19035)
e2aacce is described below

commit e2aacce02b2e09d2bed832810f4aade4d750afcb
Author: Przemyslaw Tredak <pt...@nvidia.com>
AuthorDate: Sun Aug 30 22:08:18 2020 -0700

    Fix fusion of clip if a_min or a_max are not given (#19035)
---
 src/common/cuda/rtc/forward_functions-inl.h | 8 +++++++-
 src/common/cuda/rtc/special_functions-inl.h | 3 +++
 tests/python/gpu/test_fusion.py             | 3 +++
 3 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/src/common/cuda/rtc/forward_functions-inl.h b/src/common/cuda/rtc/forward_functions-inl.h
index 3c64a92..4575980 100644
--- a/src/common/cuda/rtc/forward_functions-inl.h
+++ b/src/common/cuda/rtc/forward_functions-inl.h
@@ -835,7 +835,13 @@ __device__ inline DType trunc(const DType val) {
 
 template <typename DType>
 __device__ inline DType clip(const DType val, const float a_min, const float a_max) {
-  return max(min(val, a_max), a_min);
+  if (val > a_max) {
+    return a_max;
+  } else if (val < a_min) {
+    return a_min;
+  } else {
+    return val;
+  }
 }
 
 template <typename DType>
diff --git a/src/common/cuda/rtc/special_functions-inl.h b/src/common/cuda/rtc/special_functions-inl.h
index 50f8604..d64afb5 100644
--- a/src/common/cuda/rtc/special_functions-inl.h
+++ b/src/common/cuda/rtc/special_functions-inl.h
@@ -51,6 +51,9 @@ namespace rtc {
 //
 const char special_functions_definitions[] = R"code(
 constexpr double DBL_MAX = 1.7976931348623157081e+308;
+constexpr float FLT_MAX = 3.4028234663852885981e+38;
+#define inf ((float)1e50)
+#define nan (inf - inf)
 
 namespace op {
 
diff --git a/tests/python/gpu/test_fusion.py b/tests/python/gpu/test_fusion.py
index 6b98130..2a07897 100644
--- a/tests/python/gpu/test_fusion.py
+++ b/tests/python/gpu/test_fusion.py
@@ -160,6 +160,9 @@ def check_unary_ops():
     # clip requires a_min, a_max
     announce_check('clip')
     check_fused_symbol(mx.sym.clip(a, a_min=0.3, a_max=0.7), a=arr)
+    check_fused_symbol(mx.sym.clip(a, a_min=-np.inf, a_max=0.7), a=arr)
+    check_fused_symbol(mx.sym.clip(a, a_min=-np.inf, a_max=np.inf), a=arr)
+    check_fused_symbol(mx.sym.clip(a, a_min=0, a_max=np.nan), a=arr)
 
     # smooth_l1 requires a scalar
     announce_check('smooth_l1')