You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by sx...@apache.org on 2018/03/08 20:46:52 UTC

[incubator-mxnet] branch master updated: add axes support for dropouts in gluon (#10032)

This is an automated email from the ASF dual-hosted git repository.

sxjscience pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
     new 649b086  add axes support for dropouts in gluon (#10032)
649b086 is described below

commit 649b08665bad016a71fa8b7a29a184d25217e335
Author: Sheng Zha <sz...@users.noreply.github.com>
AuthorDate: Thu Mar 8 15:46:47 2018 -0500

    add axes support for dropouts in gluon (#10032)
---
 python/mxnet/gluon/contrib/rnn/rnn_cell.py  |  8 ++----
 python/mxnet/gluon/nn/basic_layers.py       |  9 ++++---
 python/mxnet/gluon/rnn/rnn_cell.py          | 14 ++++++----
 tests/python/unittest/test_gluon.py         | 40 +++++++++++++++++++++++++++++
 tests/python/unittest/test_gluon_contrib.py |  3 ---
 tests/python/unittest/test_operator.py      | 29 +++++++++++----------
 6 files changed, 72 insertions(+), 31 deletions(-)

diff --git a/python/mxnet/gluon/contrib/rnn/rnn_cell.py b/python/mxnet/gluon/contrib/rnn/rnn_cell.py
index d6402b7..b964c71 100644
--- a/python/mxnet/gluon/contrib/rnn/rnn_cell.py
+++ b/python/mxnet/gluon/contrib/rnn/rnn_cell.py
@@ -180,16 +180,12 @@ class VariationalDropoutCell(ModifierCell):
         states = _get_begin_state(self, F, begin_state, inputs, batch_size)
 
         if self.drop_inputs:
-            first_input = inputs.slice_axis(axis, 0, 1).split(1, axis=axis, squeeze_axis=True)
-            self._initialize_input_masks(F, first_input, states)
-            inputs = F.broadcast_mul(inputs, self.drop_inputs_mask.expand_dims(axis=axis))
+            inputs = F.Dropout(inputs, p=self.drop_inputs, axes=(axis,))
 
         outputs, states = self.base_cell.unroll(length, inputs, states, layout, merge_outputs=True,
                                                 valid_length=valid_length)
         if self.drop_outputs:
-            first_output = outputs.slice_axis(axis, 0, 1).split(1, axis=axis, squeeze_axis=True)
-            self._initialize_output_mask(F, first_output)
-            outputs = F.broadcast_mul(outputs, self.drop_outputs_mask.expand_dims(axis=axis))
+            outputs = F.Dropout(outputs, p=self.drop_outputs, axes=(axis,))
         merge_outputs = isinstance(outputs, tensor_types) if merge_outputs is None else \
             merge_outputs
         outputs, _, _, _ = _format_sequence(length, outputs, layout, merge_outputs)
diff --git a/python/mxnet/gluon/nn/basic_layers.py b/python/mxnet/gluon/nn/basic_layers.py
index b61540d..9dc1a24 100644
--- a/python/mxnet/gluon/nn/basic_layers.py
+++ b/python/mxnet/gluon/nn/basic_layers.py
@@ -226,6 +226,8 @@ class Dropout(HybridBlock):
     ----------
     rate : float
         Fraction of the input units to drop. Must be a number between 0 and 1.
+    axes : tuple of int, default ()
+        The axes on which dropout mask is shared. If empty, regular dropout is applied.
 
 
     Inputs:
@@ -239,15 +241,16 @@ class Dropout(HybridBlock):
         `Dropout: A Simple Way to Prevent Neural Networks from Overfitting
         <http://www.cs.toronto.edu/~rsalakhu/papers/srivastava14a.pdf>`_
     """
-    def __init__(self, rate, **kwargs):
+    def __init__(self, rate, axes=(), **kwargs):
         super(Dropout, self).__init__(**kwargs)
         self._rate = rate
+        self._axes = axes
 
     def hybrid_forward(self, F, x):
-        return F.Dropout(x, p=self._rate, name='fwd')
+        return F.Dropout(x, p=self._rate, axes=self._axes, name='fwd')
 
     def __repr__(self):
-        s = '{name}(p = {_rate})'
+        s = '{name}(p = {_rate}, axes={_axes})'
         return s.format(name=self.__class__.__name__,
                         **self.__dict__)
 
diff --git a/python/mxnet/gluon/rnn/rnn_cell.py b/python/mxnet/gluon/rnn/rnn_cell.py
index 61bf24e..f5c72f5 100644
--- a/python/mxnet/gluon/rnn/rnn_cell.py
+++ b/python/mxnet/gluon/rnn/rnn_cell.py
@@ -713,6 +713,8 @@ class DropoutCell(HybridRecurrentCell):
     rate : float
         Percentage of elements to drop out, which
         is 1 - percentage to retain.
+    axes : tuple of int, default ()
+        The axes on which dropout mask is shared. If empty, regular dropout is applied.
 
 
     Inputs:
@@ -723,13 +725,14 @@ class DropoutCell(HybridRecurrentCell):
         - **out**: output tensor with shape `(batch_size, size)`.
         - **next_states**: returns input `states` directly.
     """
-    def __init__(self, rate, prefix=None, params=None):
+    def __init__(self, rate, axes=(), prefix=None, params=None):
         super(DropoutCell, self).__init__(prefix, params)
         assert isinstance(rate, numeric_types), "rate must be a number"
-        self.rate = rate
+        self._rate = rate
+        self._axes = axes
 
     def __repr__(self):
-        s = '{name}(rate = {rate})'
+        s = '{name}(rate={_rate}, axes={_axes})'
         return s.format(name=self.__class__.__name__,
                         **self.__dict__)
 
@@ -740,8 +743,9 @@ class DropoutCell(HybridRecurrentCell):
         return 'dropout'
 
     def hybrid_forward(self, F, inputs, states):
-        if self.rate > 0:
-            inputs = F.Dropout(data=inputs, p=self.rate, name='t%d_fwd'%self._counter)
+        if self._rate > 0:
+            inputs = F.Dropout(data=inputs, p=self._rate, axes=self._axes,
+                               name='t%d_fwd'%self._counter)
         return inputs, states
 
     def unroll(self, length, inputs, begin_state=None, layout='NTC', merge_outputs=None,
diff --git a/tests/python/unittest/test_gluon.py b/tests/python/unittest/test_gluon.py
index 89f5215..889d210 100644
--- a/tests/python/unittest/test_gluon.py
+++ b/tests/python/unittest/test_gluon.py
@@ -827,6 +827,46 @@ def test_activations():
     x = point_to_validate.reshape((1, 3, 2))
     assert_almost_equal(prelu(x).asnumpy(), mx.nd.where(x >= 0, x, 0.25 * x).asnumpy())
 
+@with_seed()
+def test_dropout():
+    def get_slice(x, axis, idx):
+        ix = ()
+        for i in range(x.ndim):
+            if i == axis:
+                ix += (idx,)
+            else:
+                ix += (slice(None, None, None),)
+        return x[ix]
+
+    def check_dropout_axes(ratio, shape, axes):
+        compactshape = list(shape)
+        for axis in axes:
+            compactshape[axis] = 1
+        compactx = mx.random.uniform(shape=tuple(compactshape))
+        broadcastx = compactx.broadcast_to(shape)
+        dropouty = mx.gluon.nn.Dropout(rate=ratio, axes=axes)(broadcastx)
+        for axis in axes:
+            target = get_slice(dropouty, axis, 0).asnumpy()
+            for i in range(1, shape[axis]):
+                assert(get_slice(dropouty, axis, i).asnumpy() == target).all()
+
+    nshape = (10, 10, 10, 10)
+    with mx.autograd.train_mode():
+        check_dropout_axes(0.25, nshape, axes = (0,))
+        check_dropout_axes(0.25, nshape, axes = (1,))
+        check_dropout_axes(0.25, nshape, axes = (2,))
+        check_dropout_axes(0.25, nshape, axes = (3,))
+        check_dropout_axes(0.25, nshape, axes = (0, 1))
+        check_dropout_axes(0.25, nshape, axes = (0, 2))
+        check_dropout_axes(0.25, nshape, axes = (0, 3))
+        check_dropout_axes(0.25, nshape, axes = (1, 2))
+        check_dropout_axes(0.25, nshape, axes = (1, 3))
+        check_dropout_axes(0.25, nshape, axes = (2, 3))
+        check_dropout_axes(0.25, nshape, axes = (0, 1, 2))
+        check_dropout_axes(0.25, nshape, axes = (0, 2, 3))
+        check_dropout_axes(0.25, nshape, axes = (1, 2, 3))
+
+
 
 
 if __name__ == '__main__':
diff --git a/tests/python/unittest/test_gluon_contrib.py b/tests/python/unittest/test_gluon_contrib.py
index 03e4261..29850dc 100644
--- a/tests/python/unittest/test_gluon_contrib.py
+++ b/tests/python/unittest/test_gluon_contrib.py
@@ -120,11 +120,8 @@ def test_vardrop():
         input_data = mx.nd.random_uniform(shape=(10, 3, 50), ctx=mx.context.current_context())
         with mx.autograd.record():
             outputs1, _ = cell.unroll(3, input_data, merge_outputs=True)
-            mask1 = cell.drop_outputs_mask.asnumpy()
             mx.nd.waitall()
             outputs2, _ = cell.unroll(3, input_data, merge_outputs=True)
-            mask2 = cell.drop_outputs_mask.asnumpy()
-        assert not almost_equal(mask1, mask2)
         assert not almost_equal(outputs1.asnumpy(), outputs2.asnumpy())
 
         inputs = [mx.sym.Variable('rnn_t%d_data'%i) for i in range(3)]
diff --git a/tests/python/unittest/test_operator.py b/tests/python/unittest/test_operator.py
index 91b8faa..2208a33 100644
--- a/tests/python/unittest/test_operator.py
+++ b/tests/python/unittest/test_operator.py
@@ -268,7 +268,7 @@ def test_regression():
                      lambda x: x,
                      lambda x, y : x - y,
                      shape, stype='csr')
-   
+
 
 def check_softmax_grad(xpu):
     x = mx.sym.Variable('x')
@@ -4674,19 +4674,20 @@ def test_dropout():
     check_dropout_ratio(0.25, shape)
 
     nshape = (10, 10, 10, 10)
-    check_dropout_axes(0.25, nshape, axes = (0,))
-    check_dropout_axes(0.25, nshape, axes = (1,))
-    check_dropout_axes(0.25, nshape, axes = (2,))
-    check_dropout_axes(0.25, nshape, axes = (3,))
-    check_dropout_axes(0.25, nshape, axes = (0, 1))
-    check_dropout_axes(0.25, nshape, axes = (0, 2))
-    check_dropout_axes(0.25, nshape, axes = (0, 3))
-    check_dropout_axes(0.25, nshape, axes = (1, 2))
-    check_dropout_axes(0.25, nshape, axes = (1, 3))
-    check_dropout_axes(0.25, nshape, axes = (2, 3))
-    check_dropout_axes(0.25, nshape, axes = (0, 1, 2))
-    check_dropout_axes(0.25, nshape, axes = (0, 2, 3))
-    check_dropout_axes(0.25, nshape, axes = (1, 2, 3))
+    with mx.autograd.train_mode():
+        check_dropout_axes(0.25, nshape, axes = (0,))
+        check_dropout_axes(0.25, nshape, axes = (1,))
+        check_dropout_axes(0.25, nshape, axes = (2,))
+        check_dropout_axes(0.25, nshape, axes = (3,))
+        check_dropout_axes(0.25, nshape, axes = (0, 1))
+        check_dropout_axes(0.25, nshape, axes = (0, 2))
+        check_dropout_axes(0.25, nshape, axes = (0, 3))
+        check_dropout_axes(0.25, nshape, axes = (1, 2))
+        check_dropout_axes(0.25, nshape, axes = (1, 3))
+        check_dropout_axes(0.25, nshape, axes = (2, 3))
+        check_dropout_axes(0.25, nshape, axes = (0, 1, 2))
+        check_dropout_axes(0.25, nshape, axes = (0, 2, 3))
+        check_dropout_axes(0.25, nshape, axes = (1, 2, 3))
 
 
 @with_seed()

-- 
To stop receiving notification emails like this one, please contact
sxjscience@apache.org.