You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by sx...@apache.org on 2020/08/31 05:10:05 UTC
[incubator-mxnet] branch master updated: Fix fusion of clip if
a_min or a_max are not given (#19035)
This is an automated email from the ASF dual-hosted git repository.
sxjscience pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git
The following commit(s) were added to refs/heads/master by this push:
new e2aacce Fix fusion of clip if a_min or a_max are not given (#19035)
e2aacce is described below
commit e2aacce02b2e09d2bed832810f4aade4d750afcb
Author: Przemyslaw Tredak <pt...@nvidia.com>
AuthorDate: Sun Aug 30 22:08:18 2020 -0700
Fix fusion of clip if a_min or a_max are not given (#19035)
---
src/common/cuda/rtc/forward_functions-inl.h | 8 +++++++-
src/common/cuda/rtc/special_functions-inl.h | 3 +++
tests/python/gpu/test_fusion.py | 3 +++
3 files changed, 13 insertions(+), 1 deletion(-)
diff --git a/src/common/cuda/rtc/forward_functions-inl.h b/src/common/cuda/rtc/forward_functions-inl.h
index 3c64a92..4575980 100644
--- a/src/common/cuda/rtc/forward_functions-inl.h
+++ b/src/common/cuda/rtc/forward_functions-inl.h
@@ -835,7 +835,13 @@ __device__ inline DType trunc(const DType val) {
template <typename DType>
__device__ inline DType clip(const DType val, const float a_min, const float a_max) {
- return max(min(val, a_max), a_min);
+ if (val > a_max) {
+ return a_max;
+ } else if (val < a_min) {
+ return a_min;
+ } else {
+ return val;
+ }
}
template <typename DType>
diff --git a/src/common/cuda/rtc/special_functions-inl.h b/src/common/cuda/rtc/special_functions-inl.h
index 50f8604..d64afb5 100644
--- a/src/common/cuda/rtc/special_functions-inl.h
+++ b/src/common/cuda/rtc/special_functions-inl.h
@@ -51,6 +51,9 @@ namespace rtc {
//
const char special_functions_definitions[] = R"code(
constexpr double DBL_MAX = 1.7976931348623157081e+308;
+constexpr float FLT_MAX = 3.4028234663852885981e+38;
+#define inf ((float)1e50)
+#define nan (inf - inf)
namespace op {
diff --git a/tests/python/gpu/test_fusion.py b/tests/python/gpu/test_fusion.py
index 6b98130..2a07897 100644
--- a/tests/python/gpu/test_fusion.py
+++ b/tests/python/gpu/test_fusion.py
@@ -160,6 +160,9 @@ def check_unary_ops():
# clip requires a_min, a_max
announce_check('clip')
check_fused_symbol(mx.sym.clip(a, a_min=0.3, a_max=0.7), a=arr)
+ check_fused_symbol(mx.sym.clip(a, a_min=-np.inf, a_max=0.7), a=arr)
+ check_fused_symbol(mx.sym.clip(a, a_min=-np.inf, a_max=np.inf), a=arr)
+ check_fused_symbol(mx.sym.clip(a, a_min=0, a_max=np.nan), a=arr)
# smooth_l1 requires a scalar
announce_check('smooth_l1')