You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by an...@apache.org on 2018/08/20 23:21:30 UTC
[incubator-mxnet] branch master updated: [MXNET-792] Fix for issue #9816 with dropout operator and RNG (#12091)

This is an automated email from the ASF dual-hosted git repository.

anirudh2290 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
     new 2899715  [MXNET-792] Fix for issue #9816 with dropout operator and RNG (#12091)
2899715 is described below

commit 2899715921612ef4dd147004292b5b5d0f83320b
Author: Sam Skalicky <sa...@gmail.com>
AuthorDate: Mon Aug 20 16:21:22 2018 -0700

    [MXNET-792] Fix for issue #9816 with dropout operator and RNG (#12091)
    
    * added mshadow op for threshold_eq (theshold currently does <, this will do <=)
    
    modified dropout operator to use threshold_eq instead of theshold this will ensure equivalent behavior for the random numbers generated on CPU [0, 1) and GPU (0, 1]
    
    removed fixed seed for test_dropout
    
    * removed comment about flaky test
---
 src/operator/mshadow_op.h              | 1 +
 src/operator/nn/dropout-inl.h          | 3 ++-
 tests/python/unittest/test_operator.py | 4 ++--
 3 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/operator/mshadow_op.h b/src/operator/mshadow_op.h
index 3397193..06a223d 100644
--- a/src/operator/mshadow_op.h
+++ b/src/operator/mshadow_op.h
@@ -275,6 +275,7 @@ MXNET_UNARY_MATH_OP(square_grad, 2.0f * math::id(a));
 
 /*! \brief used for generate Bernoulli mask */
 MXNET_BINARY_MATH_OP_NC(threshold, a < b ? DType(1) : DType(0));
+MXNET_BINARY_MATH_OP_NC(threshold_eq, a <= b ? DType(1) : DType(0));
 
 /*! \brief used for generate element of abs */
 MXNET_UNARY_MATH_OP(abs, math::fabs(a)); // NOLINT(*)
diff --git a/src/operator/nn/dropout-inl.h b/src/operator/nn/dropout-inl.h
index 8e4aac6..b7c40fb 100644
--- a/src/operator/nn/dropout-inl.h
+++ b/src/operator/nn/dropout-inl.h
@@ -206,7 +206,7 @@ class DropoutOp {
                                     const real_t pkeep) {
       RNG_KERNEL_LOOP(xpu, DType, id, gen, N, step, {
         const real_t rand_num = static_cast<real_t>(genImpl.uniform());
-        mask_out[i] = mshadow_op::threshold::Map<real_t>(rand_num, pkeep) * (1.0f / pkeep);
+        mask_out[i] = mshadow_op::threshold_eq::Map<real_t>(rand_num, pkeep) * (1.0f / pkeep);
         dropout_out[i] = input_data[i] * mask_out[i];
       });
     }
@@ -258,6 +258,7 @@ class DropoutOp {
                                         this->pkeep_);
             return;
           }
+
           // initialize the mask
           LaunchRNG<BernoulliKernel, xpu>(s, pgen, mask.Size(),
                                           mask.dptr<DType>(),
diff --git a/tests/python/unittest/test_operator.py b/tests/python/unittest/test_operator.py
index 125666b..0ff9a10 100644
--- a/tests/python/unittest/test_operator.py
+++ b/tests/python/unittest/test_operator.py
@@ -5722,8 +5722,7 @@ def test_stack():
         check_numeric_gradient(out, inputs)
 
 
-# test fails with seed 990952066: 0 output seen with dropout ratio=0. See issue #9816
-@with_seed(1234)
+@with_seed()
 def test_dropout():
     def zero_count(array, ratio):
         zeros = 0
@@ -5775,6 +5774,7 @@ def test_dropout():
 
         exe.arg_arrays[0][:] = 1
         exe.forward(is_train=True)
+
         if not math.isnan(max_value):
             assert exe.outputs[0].asnumpy().max() > 0
         else: