You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by ha...@apache.org on 2019/07/18 00:00:51 UTC

[incubator-mxnet] 15/42: [WIP][numpy] Fix for D2L Chapters 2/3/4 (#15139)

This is an automated email from the ASF dual-hosted git repository.

haoj pushed a commit to branch numpy
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git

commit 11cb34ae2961890de2e53f6454543b57f25a1ef3
Author: reminisce <wu...@gmail.com>
AuthorDate: Tue Jun 4 22:55:10 2019 -0700

    [WIP][numpy] Fix for D2L Chapters 2/3/4 (#15139)
    
    * Fix
    
    * Fix linear regression gluon
    
    * More fix
    
    * Fix pylint
    
    * Fix for chapter 4
    
    * Add np.add mul div mod pow sub and shuffle
    
    * Fix model selection, underfitting, overfitting
    
    * Fix weight decay
    
    * Fix dropout
    
    * Fix
    
    * Fix chapter 4
---
 python/mxnet/gluon/data/dataloader.py          |  20 +-
 python/mxnet/gluon/data/vision/transforms.py   |   6 +-
 python/mxnet/gluon/loss.py                     |  26 +-
 python/mxnet/gluon/nn/activations.py           |   5 +-
 python/mxnet/gluon/nn/basic_layers.py          |  13 +-
 python/mxnet/gluon/utils.py                    |  50 ++--
 python/mxnet/ndarray/numpy/_op.py              | 199 ++++++++++++++-
 python/mxnet/ndarray/register.py               |   8 +-
 python/mxnet/numpy/multiarray.py               | 326 ++++++++++++++++++-------
 python/mxnet/numpy_extension/__init__.py       |   5 +-
 python/mxnet/optimizer/optimizer.py            |  10 +-
 python/mxnet/symbol/numpy/_symbol.py           | 194 ++++++++-------
 python/mxnet/symbol/register.py                |   8 +-
 python/mxnet/symbol/symbol.py                  |   4 +
 python/mxnet/util.py                           |  38 ++-
 src/operator/nn/activation.cc                  |   1 +
 src/operator/nn/batch_norm.cc                  |   1 +
 src/operator/nn/convolution.cc                 |   1 +
 src/operator/nn/fully_connected.cc             |   1 +
 src/operator/nn/pooling.cc                     |   3 +-
 src/operator/random/shuffle_op.cc              |   1 +
 src/operator/tensor/elemwise_unary_op_basic.cc |   1 +
 src/operator/tensor/matrix_op.cc               |   1 +
 tests/python/unittest/test_numpy_gluon.py      |   6 +-
 24 files changed, 696 insertions(+), 232 deletions(-)

diff --git a/python/mxnet/gluon/data/dataloader.py b/python/mxnet/gluon/data/dataloader.py
index 65fd7d8..7e8110c 100644
--- a/python/mxnet/gluon/data/dataloader.py
+++ b/python/mxnet/gluon/data/dataloader.py
@@ -18,6 +18,7 @@
 # coding: utf-8
 # pylint: disable=ungrouped-imports
 """Dataset generator."""
+from __future__ import absolute_import
 __all__ = ['DataLoader']
 
 import pickle
@@ -37,6 +38,8 @@ except ImportError:
 
 from . import sampler as _sampler
 from ... import nd, context
+from ...util import is_np_array
+from ... import numpy as _mx_np  #pylint: disable=reimported
 
 if sys.platform == 'darwin' or sys.platform == 'win32':
     def rebuild_ndarray(*args):
@@ -127,13 +130,14 @@ class SimpleQueue(multiprocessing.queues.SimpleQueue):
 def default_batchify_fn(data):
     """Collate data into batch."""
     if isinstance(data[0], nd.NDArray):
-        return nd.stack(*data)
+        return _mx_np.stack(data) if is_np_array() else nd.stack(*data)
     elif isinstance(data[0], tuple):
         data = zip(*data)
         return [default_batchify_fn(i) for i in data]
     else:
         data = np.asarray(data)
-        return nd.array(data, dtype=data.dtype)
+        array_fn = _mx_np.array if is_np_array() else nd.array
+        return array_fn(data, dtype=data.dtype)
 
 
 def default_mp_batchify_fn(data):
@@ -141,20 +145,26 @@ def default_mp_batchify_fn(data):
     if isinstance(data[0], nd.NDArray):
         out = nd.empty((len(data),) + data[0].shape, dtype=data[0].dtype,
                        ctx=context.Context('cpu_shared', 0))
-        return nd.stack(*data, out=out)
+        if is_np_array():
+            out = out.as_np_ndarray()
+            return _mx_np.stack(data, out=out)
+        else:
+            return nd.stack(*data, out=out)
     elif isinstance(data[0], tuple):
         data = zip(*data)
         return [default_mp_batchify_fn(i) for i in data]
     else:
         data = np.asarray(data)
-        return nd.array(data, dtype=data.dtype,
+        array_fn = _mx_np.array if is_np_array() else nd.array
+        return array_fn(data, dtype=data.dtype,
                         ctx=context.Context('cpu_shared', 0))
 
 
 def _as_in_context(data, ctx):
     """Move data into new context."""
     if isinstance(data, nd.NDArray):
-        return data.as_in_context(ctx)
+        out = data.as_in_context(ctx)
+        return out.as_np_ndarray() if is_np_array() else out
     elif isinstance(data, (list, tuple)):
         return [_as_in_context(d, ctx) for d in data]
     return data
diff --git a/python/mxnet/gluon/data/vision/transforms.py b/python/mxnet/gluon/data/vision/transforms.py
index 955f2b2..0e90c17 100644
--- a/python/mxnet/gluon/data/vision/transforms.py
+++ b/python/mxnet/gluon/data/vision/transforms.py
@@ -23,6 +23,7 @@ from ...block import Block, HybridBlock
 from ...nn import Sequential, HybridSequential
 from .... import image
 from ....base import numeric_types
+from ....util import is_np_array
 
 
 class Compose(Sequential):
@@ -134,7 +135,10 @@ class ToTensor(HybridBlock):
         super(ToTensor, self).__init__()
 
     def hybrid_forward(self, F, x):
-        return F.image.to_tensor(x)
+        if is_np_array():
+            x = x.as_classic_ndarray()
+        out = F.image.to_tensor(x)
+        return out.as_np_ndarray() if is_np_array() else out
 
 
 class Normalize(HybridBlock):
diff --git a/python/mxnet/gluon/loss.py b/python/mxnet/gluon/loss.py
index e6d4c5b..8cf41a2 100644
--- a/python/mxnet/gluon/loss.py
+++ b/python/mxnet/gluon/loss.py
@@ -29,6 +29,7 @@ import numpy as np
 from .. import ndarray
 from ..base import numeric_types
 from .block import HybridBlock
+from .utils import _to_classic_arrays, _to_np_arrays
 
 
 def _apply_weighting(F, loss, weight=None, sample_weight=None):
@@ -135,10 +136,14 @@ class L2Loss(Loss):
         super(L2Loss, self).__init__(weight, batch_axis, **kwargs)
 
     def hybrid_forward(self, F, pred, label, sample_weight=None):
+        # TODO(junwu): This is a temp solution to reuse legacy ops for np.ndarray.
+        # We should rewrite this with np/npx ops.
+        pred, label, sample_weight = _to_classic_arrays(pred, label, sample_weight)
         label = _reshape_like(F, label, pred)
         loss = F.square(label - pred)
         loss = _apply_weighting(F, loss, self._weight / 2, sample_weight)
-        return F.mean(loss, axis=self._batch_axis, exclude=True)
+        out = F.mean(loss, axis=self._batch_axis, exclude=True)
+        return _to_np_arrays(out)
 
 
 class L1Loss(Loss):
@@ -174,10 +179,14 @@ class L1Loss(Loss):
         super(L1Loss, self).__init__(weight, batch_axis, **kwargs)
 
     def hybrid_forward(self, F, pred, label, sample_weight=None):
+        # TODO(junwu): This is a temp solution to reuse legacy ops for np.ndarray.
+        # We should rewrite this with np/npx ops.
+        pred, label, sample_weight = _to_classic_arrays(pred, label, sample_weight)
         label = _reshape_like(F, label, pred)
         loss = F.abs(label - pred)
         loss = _apply_weighting(F, loss, self._weight, sample_weight)
-        return F.mean(loss, axis=self._batch_axis, exclude=True)
+        out = F.mean(loss, axis=self._batch_axis, exclude=True)
+        return _to_np_arrays(out)
 
 
 class SigmoidBinaryCrossEntropyLoss(Loss):
@@ -243,6 +252,10 @@ class SigmoidBinaryCrossEntropyLoss(Loss):
         self._from_sigmoid = from_sigmoid
 
     def hybrid_forward(self, F, pred, label, sample_weight=None, pos_weight=None):
+        # TODO(junwu): This is a temp solution to reuse legacy ops for np.ndarray.
+        # We should rewrite this with np/npx ops.
+        pred, label, sample_weight, pos_weight =\
+            _to_classic_arrays(pred, label, sample_weight, pos_weight)
         label = _reshape_like(F, label, pred)
         if not self._from_sigmoid:
             if pos_weight is None:
@@ -264,7 +277,8 @@ class SigmoidBinaryCrossEntropyLoss(Loss):
                 loss = -(F.broadcast_mul(F.log(pred + eps) * label, pos_weight)
                          + F.log(1. - pred + eps) * (1. - label))
         loss = _apply_weighting(F, loss, self._weight, sample_weight)
-        return F.mean(loss, axis=self._batch_axis, exclude=True)
+        out = F.mean(loss, axis=self._batch_axis, exclude=True)
+        return _to_np_arrays(out)
 
 
 SigmoidBCELoss = SigmoidBinaryCrossEntropyLoss
@@ -341,6 +355,9 @@ class SoftmaxCrossEntropyLoss(Loss):
         self._from_logits = from_logits
 
     def hybrid_forward(self, F, pred, label, sample_weight=None):
+        # TODO(junwu): This is a temp solution to reuse legacy ops for np.ndarray.
+        # We should rewrite this with np/npx ops.
+        pred, label = _to_classic_arrays(pred, label)
         if not self._from_logits:
             pred = F.log_softmax(pred, self._axis)
         if self._sparse_label:
@@ -349,7 +366,8 @@ class SoftmaxCrossEntropyLoss(Loss):
             label = _reshape_like(F, label, pred)
             loss = -F.sum(pred * label, axis=self._axis, keepdims=True)
         loss = _apply_weighting(F, loss, self._weight, sample_weight)
-        return F.mean(loss, axis=self._batch_axis, exclude=True)
+        out = F.mean(loss, axis=self._batch_axis, exclude=True)
+        return _to_np_arrays(out)
 
 
 SoftmaxCELoss = SoftmaxCrossEntropyLoss
diff --git a/python/mxnet/gluon/nn/activations.py b/python/mxnet/gluon/nn/activations.py
index 8c51b0a..04a8227 100644
--- a/python/mxnet/gluon/nn/activations.py
+++ b/python/mxnet/gluon/nn/activations.py
@@ -22,6 +22,7 @@ __all__ = ['Activation', 'LeakyReLU', 'PReLU', 'ELU', 'SELU', 'Swish', 'GELU']
 
 from ... import initializer
 from ..block import HybridBlock
+from ..utils import _to_classic_arrays, _to_np_arrays
 
 
 class Activation(HybridBlock):
@@ -48,7 +49,9 @@ class Activation(HybridBlock):
         return self._act_type
 
     def hybrid_forward(self, F, x):
-        return F.Activation(x, act_type=self._act_type, name='fwd')
+        x = _to_classic_arrays(x)
+        out = F.Activation(x, act_type=self._act_type, name='fwd')
+        return _to_np_arrays(out)
 
     def __repr__(self):
         s = '{name}({_act_type})'
diff --git a/python/mxnet/gluon/nn/basic_layers.py b/python/mxnet/gluon/nn/basic_layers.py
index 3d6976c..654e3ef 100644
--- a/python/mxnet/gluon/nn/basic_layers.py
+++ b/python/mxnet/gluon/nn/basic_layers.py
@@ -25,7 +25,7 @@ import numpy as np
 
 from .activations import Activation
 from ..block import Block, HybridBlock
-from ..utils import _indent
+from ..utils import _indent, _to_classic_arrays, _to_np_arrays
 from ... import nd, sym
 
 
@@ -217,11 +217,14 @@ class Dense(HybridBlock):
                 self.act = None
 
     def hybrid_forward(self, F, x, weight, bias=None):
+        # TODO(junwu): This is a temp solution to reuse legacy ops for np.ndarray.
+        # We should rewrite this with np/npx ops.
+        x, weight, bias = _to_classic_arrays(x, weight, bias)
         act = F.FullyConnected(x, weight, bias, no_bias=bias is None, num_hidden=self._units,
                                flatten=self._flatten, name='fwd')
         if self.act is not None:
             act = self.act(act)
-        return act
+        return _to_np_arrays(act)
 
     def __repr__(self):
         s = '{name}({layout}, {act})'
@@ -262,10 +265,12 @@ class Dropout(HybridBlock):
         self._axes = axes
 
     def hybrid_forward(self, F, x):
+        x = _to_classic_arrays(x)
         if self._rate > 0:
-            return F.Dropout(x, p=self._rate, axes=self._axes, name='fwd', cudnn_off=False)
+            out = F.Dropout(x, p=self._rate, axes=self._axes, name='fwd', cudnn_off=False)
         else:
-            return F.identity(x)
+            out = F.identity(x)
+        return _to_np_arrays(out)
 
     def __repr__(self):
         s = '{name}(p = {_rate}, axes={_axes})'
diff --git a/python/mxnet/gluon/utils.py b/python/mxnet/gluon/utils.py
index 4ef4905..19f5c1a 100644
--- a/python/mxnet/gluon/utils.py
+++ b/python/mxnet/gluon/utils.py
@@ -38,7 +38,7 @@ except ImportError:
 import numpy as np
 
 from .. import ndarray
-from ..util import is_np_shape
+from ..util import is_np_shape, is_np_array
 
 
 def split_data(data, num_slice, batch_axis=0, even_split=True):
@@ -112,12 +112,18 @@ def split_and_load(data, ctx_list, batch_axis=0, even_split=True):
     list of NDArray
         Each corresponds to a context in `ctx_list`.
     """
+    # TODO(junwu): temp solution for supporting np.ndarray
+    # rewrite this using np ops
     if not isinstance(data, ndarray.NDArray):
         data = ndarray.array(data, ctx=ctx_list[0])
     if len(ctx_list) == 1:
+        if is_np_array():
+            data = data.as_np_ndarray()
         return [data.as_in_context(ctx_list[0])]
 
     slices = split_data(data, len(ctx_list), batch_axis, even_split)
+    if is_np_array():
+        slices = [i.as_np_ndarray() for i in slices]
     return [i.as_in_context(ctx) for i, ctx in zip(slices, ctx_list)]
 
 
@@ -415,6 +421,7 @@ class HookHandle(object):
     def __exit__(self, ptype, value, trace):
         self.detach()
 
+
 def shape_is_known(shape):
     """Check whether a shape is completely known with or without np semantics.
 
@@ -432,6 +439,7 @@ def shape_is_known(shape):
                                             "received {}".format(unknown_dim_size, dim_size)
     return True
 
+
 def _check_same_symbol_type(symbols):
     """Check whether all the symbols in the list are of the same type.
     Raise type error if the types are different. Return the class of
@@ -458,23 +466,33 @@ def _check_same_symbol_type(symbols):
 def _check_all_np_ndarrays(out):
     """Check if ndarrays in out are all np.ndarray"""
     from ..numpy import ndarray as np_ndarray
+    from ..symbol.numpy import _Symbol as np_symbol
     assert isinstance(out, (list, tuple))
     for array in out:
-        if not isinstance(array, np_ndarray):
-            raise TypeError('Expected np.ndarray type in output, while received type '
+        if not isinstance(array, (np_ndarray, np_symbol)):
+            raise TypeError('Expected np.ndarray or np._Symbol type in output, while received type '
                             '{}'.format(str(type(array))))
 
 
-def shape_is_known(shape):
-    """Check whether a shape is completely known w/ or w/o np semantics."""
-    if shape is None:
-        return False
-    unknown_dim_size = -1 if is_np_shape() else 0
-    if len(shape) == 0:
-        return unknown_dim_size == -1
-    for dim_size in shape:
-        if dim_size == unknown_dim_size:
-            return False
-        assert dim_size > unknown_dim_size, "shape dimension size cannot be less than {}, while " \
-                                            "received {}".format(unknown_dim_size, dim_size)
-    return True
+def _to_classic_arrays(*args):
+    """Convert arrays to classic arrays. This is used in a Gluon layer for converting
+    inputs of np arrays to classic arrays so that the layer built with legacy ops can still
+    be used in np_array semantics."""
+    num_inputs = len(args)
+    assert num_inputs != 0
+    if not is_np_array():
+        return args[0] if num_inputs == 1 else args
+    in_arrs = [arr if arr is None else arr.as_classic_ndarray() for arr in args]
+    return in_arrs[0] if num_inputs == 1 else in_arrs
+
+
+def _to_np_arrays(*args):
+    """Convert arrays to np arrays. This is used in a Gluon layer for converting
+    outputs of classic arrays to np arrays so that the layer built with legacy ops can still
+    be used in np_array semantics."""
+    num_outputs = len(args)
+    assert num_outputs != 0
+    if not is_np_array():
+        return args[0] if num_outputs == 1 else args
+    out = [arr.as_np_ndarray() for arr in args]
+    return out[0] if num_outputs == 1 else out
diff --git a/python/mxnet/ndarray/numpy/_op.py b/python/mxnet/ndarray/numpy/_op.py
index 6c83e1f..f3f4d74 100644
--- a/python/mxnet/ndarray/numpy/_op.py
+++ b/python/mxnet/ndarray/numpy/_op.py
@@ -24,7 +24,9 @@ from ...util import _sanity_check_params, set_module
 from ...context import current_context
 from . import _internal as _npi
 
-__all__ = ['zeros', 'ones', 'maximum', 'minimum', 'stack', 'concatenate', 'arange', 'argmax']
+__all__ = ['zeros', 'ones', 'maximum', 'minimum', 'stack', 'arange', 'argmax',
+           'add', 'subtract', 'multiply', 'divide', 'mod', 'power', 'concatenate',
+           'clip']
 
 
 @set_module('mxnet.ndarray.numpy')
@@ -51,7 +53,7 @@ def zeros(shape, dtype=_np.float32, **kwargs):
         Array of zeros with the given shape, dtype, and ctx.
     """
     _sanity_check_params('zeros', ['order'], kwargs)
-    ctx = kwargs.get('ctx', current_context())
+    ctx = kwargs.pop('ctx', current_context())
     if ctx is None:
         ctx = current_context()
     dtype = _np.float32 if dtype is None else dtype
@@ -82,7 +84,7 @@ def ones(shape, dtype=None, **kwargs):
         Array of zeros with the given shape, dtype, and ctx.
     """
     _sanity_check_params('zeros', ['order'], kwargs)
-    ctx = kwargs.get('ctx', current_context())
+    ctx = kwargs.pop('ctx', current_context())
     if ctx is None:
         ctx = current_context()
     dtype = _np.float32 if dtype is None else dtype
@@ -302,3 +304,194 @@ def concatenate(seq, axis=0, out=None):
         The concatenated array.
     """
     return _npi.concatenate(*seq, dim=axis, out=out)
+
+
+@set_module('mxnet.ndarray.numpy')
+def add(x1, x2, out=None):
+    """Add arguments element-wise.
+
+    Parameters
+    ----------
+    x1, x2 : ndarrays or scalar values
+        The arrays to be added. If x1.shape != x2.shape, they must be broadcastable to
+        a common shape (which may be the shape of one or the other).
+
+    out : ndarray
+        A location into which the result is stored. If provided, it must have a shape
+        that the inputs broadcast to. If not provided or None, a freshly-allocated array
+        is returned.
+
+    Returns
+    -------
+    add : ndarray or scalar
+        The sum of x1 and x2, element-wise. This is a scalar if both x1 and x2 are scalars.
+    """
+    return _ufunc_helper(x1, x2, _npi.add, _np.add, _npi.add_scalar, None, out)
+
+
+@set_module('mxnet.ndarray.numpy')
+def subtract(x1, x2, out=None):
+    """Subtract arguments element-wise.
+
+    Parameters
+    ----------
+    x1, x2 : ndarrays or scalar values
+        The arrays to be subtracted from each other. If x1.shape != x2.shape,
+        they must be broadcastable to a common shape (which may be the shape
+        of one or the other).
+
+    out : ndarray
+        A location into which the result is stored. If provided, it must have a shape
+        that the inputs broadcast to. If not provided or None, a freshly-allocated array
+        is returned.
+
+    Returns
+    -------
+    subtract : ndarray or scalar
+        The difference of x1 and x2, element-wise. This is a scalar if both x1 and x2 are scalars.
+    """
+    return _ufunc_helper(x1, x2, _npi.subtract, _np.subtract, _npi.subtract_scalar,
+                         _npi.rsubtract_scalar, out)
+
+
+@set_module('mxnet.ndarray.numpy')
+def multiply(x1, x2, out=None):
+    """Multiply arguments element-wise.
+
+    Parameters
+    ----------
+    x1, x2 : ndarrays or scalar values
+        The arrays to be multiplied. If x1.shape != x2.shape, they must be broadcastable to
+        a common shape (which may be the shape of one or the other).
+
+    out : ndarray
+        A location into which the result is stored. If provided, it must have a shape
+        that the inputs broadcast to. If not provided or None, a freshly-allocated array
+        is returned.
+
+    Returns
+    -------
+    out : ndarray or scalar
+        The multiplication of x1 and x2, element-wise. This is a scalar if both x1 and x2
+        are scalars.
+    """
+    return _ufunc_helper(x1, x2, _npi.multiply, _np.multiply, _npi.multiply_scalar, None, out)
+
+
+@set_module('mxnet.ndarray.numpy')
+def divide(x1, x2, out=None):
+    """Returns a true division of the inputs, element-wise.
+
+    Parameters
+    ----------
+    x1 : ndarray or scalar
+        Dividend array.
+
+    x2 : ndarray or scalar
+        Divisor array.
+
+    out : ndarray
+        A location into which the result is stored. If provided, it must have a shape
+        that the inputs broadcast to. If not provided or None, a freshly-allocated array
+        is returned.
+
+    Returns
+    -------
+    out : ndarray or scalar
+        This is a scalar if both x1 and x2 are scalars.
+    """
+    return _ufunc_helper(x1, x2, _npi.true_divide, _np.divide, _npi.true_divide_scalar,
+                         _npi.rtrue_divide_scalar, out)
+
+
+@set_module('mxnet.ndarray.numpy')
+def mod(x1, x2, out=None):
+    """Return element-wise remainder of division.
+
+    Parameters
+    ----------
+    x1 : ndarray or scalar
+        Dividend array.
+
+    x2 : ndarray or scalar
+        Divisor array.
+
+    out : ndarray
+        A location into which the result is stored. If provided, it must have a shape
+        that the inputs broadcast to. If not provided or None, a freshly-allocated array
+        is returned.
+
+    Returns
+    -------
+    out : ndarray or scalar
+        This is a scalar if both x1 and x2 are scalars.
+    """
+    return _ufunc_helper(x1, x2, _npi.mod, _np.mod, _npi.mod_scalar, _npi.rmod_scalar, out)
+
+
+@set_module('mxnet.ndarray.numpy')
+def power(x1, x2, out=None):
+    """First array elements raised to powers from second array, element-wise.
+
+    Parameters
+    ----------
+    x1 : ndarray or scalar
+        The bases.
+
+    x2 : ndarray or scalar
+        The exponent.
+
+    out : ndarray
+        A location into which the result is stored. If provided, it must have a shape
+        that the inputs broadcast to. If not provided or None, a freshly-allocated array
+        is returned.
+
+    Returns
+    -------
+    out : ndarray or scalar
+        The bases in x1 raised to the exponents in x2.
+        This is a scalar if both x1 and x2 are scalars.
+    """
+    return _ufunc_helper(x1, x2, _npi.power, _np.power, _npi.power_scalar, _npi.rpower_scalar, out)
+
+
+@set_module('mxnet.ndarray.numpy')
+def clip(a, a_min, a_max, out=None):
+    """Clip (limit) the values in an array.
+
+    Given an interval, values outside the interval are clipped to
+    the interval edges.  For example, if an interval of ``[0, 1]``
+    is specified, values smaller than 0 become 0, and values larger
+    than 1 become 1.
+
+    Parameters
+    ----------
+    a : ndarray
+        Array containing elements to clip.
+    a_min : scalar or `None`
+        Minimum value. If `None`, clipping is not performed on lower
+        interval edge. Not more than one of `a_min` and `a_max` may be
+        `None`.
+    a_max : scalar or `None`
+        Maximum value. If `None`, clipping is not performed on upper
+        interval edge. Not more than one of `a_min` and `a_max` may be
+        `None`.
+    out : ndarray, optional
+        The results will be placed in this array. It may be the input
+        array for in-place clipping.  `out` must be of the right shape
+        to hold the output.
+
+    Returns
+    -------
+    clipped_array : ndarray
+        An array with the elements of `a`, but where values
+        < `a_min` are replaced with `a_min`, and those > `a_max`
+        with `a_max`.
+    """
+    if a_min is None and a_max is None:
+        raise ValueError('array_clip: must set either max or min')
+    if a_min is None:
+        a_min = float('-inf')
+    if a_max is None:
+        a_max = float('inf')
+    return _npi.clip(a, a_min, a_max, out=out)
diff --git a/python/mxnet/ndarray/register.py b/python/mxnet/ndarray/register.py
index c2225bb..cde1145 100644
--- a/python/mxnet/ndarray/register.py
+++ b/python/mxnet/ndarray/register.py
@@ -221,7 +221,13 @@ def %s(%s):"""%(func_name, ', '.join(signature)))
         vals.append(%s)"""%(name, name, name))
             # dtype
             if dtype_name is not None:
-                code.append("""
+                if is_np_op:
+                    code.append("""
+    if %s is not _Null and %s is not None:
+        keys.append('%s')
+        vals.append(_np.dtype(%s).name)"""%(dtype_name, dtype_name, dtype_name, dtype_name))
+                else:
+                    code.append("""
     if %s is not _Null:
         keys.append('%s')
         vals.append(_np.dtype(%s).name)"""%(dtype_name, dtype_name, dtype_name))
diff --git a/python/mxnet/numpy/multiarray.py b/python/mxnet/numpy/multiarray.py
index 6b3dcde..2f0cdbc 100644
--- a/python/mxnet/numpy/multiarray.py
+++ b/python/mxnet/numpy/multiarray.py
@@ -37,8 +37,9 @@ from ..context import current_context
 from ..ndarray import numpy as _mx_nd_np
 from ..ndarray.numpy import _internal as _npi
 
-__all__ = ['ndarray', 'empty', 'array', 'zeros', 'ones', 'maximum', 'minimum', 'stack',
-           'concatenate', 'arange', 'argmax']
+__all__ = ['ndarray', 'empty', 'array', 'zeros', 'ones', 'maximum', 'minimum', 'stack', 'arange',
+           'argmax', 'add', 'subtract', 'multiply', 'divide', 'mod', 'power', 'concatenate',
+           'clip']
 
 
 # This function is copied from ndarray.py since pylint
@@ -152,67 +153,40 @@ class ndarray(NDArray):
 
     def __add__(self, other):
         """x.__add__(y) <=> x + y"""
-        if isinstance(other, ndarray):
-            return _npi.add(self, other)
-        elif isinstance(other, numeric_types):
-            return _npi.add_scalar(self, float(other))
-        else:
-            raise TypeError("ndarray does not support type {} as operand".format(str(type(other))))
+        return add(self, other)
 
     def __iadd__(self, other):
         """x.__iadd__(y) <=> x += y"""
         if not self.writable:
             raise ValueError('trying to add to a readonly ndarray')
-        if isinstance(other, ndarray):
-            return _npi.add(self, other, out=self)
-        elif isinstance(other, numeric_types):
-            return _npi.add_scalar(self, float(other), out=self)
-        else:
-            raise TypeError('type {} is not supported'.format(str(type(other))))
+        return add(self, other, out=self)
 
     def __sub__(self, other):
         """x.__sub__(y) <=> x - y"""
-        if isinstance(other, ndarray):
-            return _npi.subtract(self, other)
-        elif isinstance(other, numeric_types):
-            return _npi.subtract_scalar(self, float(other))
-        else:
-            raise TypeError("ndarray does not support type {} as operand".format(str(type(other))))
+        return subtract(self, other)
 
     def __isub__(self, other):
         """x.__isub__(y) <=> x -= y"""
         if not self.writable:
             raise ValueError('trying to subtract from a readonly ndarray')
-        if isinstance(other, ndarray):
-            return _npi.subtract(self, other, out=self)
-        elif isinstance(other, numeric_types):
-            return _npi.subtract_scalar(self, float(other), out=self)
-        else:
-            raise TypeError('type {} is not supported'.format(str(type(other))))
+        return subtract(self, other, out=self)
 
     def __rsub__(self, other):
         """x.__rsub__(y) <=> y - x"""
-        if isinstance(other, ndarray):
-            return _npi.subtract(other, self)
-        elif isinstance(other, numeric_types):
-            return _npi.rsubtract_scalar(self, float(other))
-        else:
-            raise TypeError("ndarray does not support type {} as operand".format(str(type(other))))
+        return subtract(other, self)
 
     def __mul__(self, other):
         """x.__mul__(y) <=> x * y"""
-        if isinstance(other, ndarray):
-            return _npi.multiply(self, other)
-        elif isinstance(other, numeric_types):
-            return _npi.multiply_scalar(self, float(other))
-        else:
-            raise TypeError("ndarray does not support type {} as operand".format(str(type(other))))
+        return multiply(self, other)
 
     def __neg__(self):
         return self.__mul__(-1.0)
 
     def __imul__(self, other):
-        raise NotImplementedError
+        """x.__imul__(y) <=> x *= y"""
+        if not self.writable:
+            raise ValueError('trying to add to a readonly ndarray')
+        return multiply(self, other, out=self)
 
     def __rmul__(self, other):
         """x.__rmul__(y) <=> y * x"""
@@ -233,67 +207,42 @@ class ndarray(NDArray):
                              ' been encountered.')
 
     def __idiv__(self, other):
-        raise NotImplementedError
+        raise AttributeError('ndarray.__idiv__ is replaced by __irtruediv__. If you are using'
+                             ' Python2, please use the statement from __future__ import division'
+                             ' to change the / operator to mean true division throughout the'
+                             ' module. If you are using Python3, this error should not have'
+                             ' been encountered.')
 
     def __truediv__(self, other):
         """x.__truediv__(y) <=> x / y"""
-        if isinstance(other, ndarray):
-            return _npi.true_divide(self, other)
-        elif isinstance(other, numeric_types):
-            return _npi.true_divide_scalar(self, float(other))
-        else:
-            raise TypeError("ndarray does not support type {} as divisor".format(str(type(other))))
+        return divide(self, other)
 
     def __rtruediv__(self, other):
         """x.__rtruediv__(y) <=> y / x"""
-        if isinstance(other, ndarray):
-            return _npi.true_divide(other, self)
-        elif isinstance(other, numeric_types):
-            return _npi.rtrue_divide_scalar(self, float(other))
-        else:
-            raise TypeError("ndarray does not support type {} as dividend".format(str(type(other))))
+        return divide(other, self)
 
     def __itruediv__(self, other):
-        raise NotImplementedError
+        return divide(self, other, out=self)
 
     def __mod__(self, other):
         """x.__mod__(y) <=> x % y"""
-        if isinstance(other, ndarray):
-            return _npi.mod(self, other)
-        elif isinstance(other, numeric_types):
-            return _npi.mod_scalar(self, float(other))
-        else:
-            raise TypeError("ndarray does not support type {} as operand".format(str(type(other))))
+        return mod(self, other)
 
     def __rmod__(self, other):
         """x.__rmod__(y) <=> y % x"""
-        if isinstance(other, ndarray):
-            return _npi.mod(other, self)
-        elif isinstance(other, numeric_types):
-            return _npi.rmod_scalar(self, float(other))
-        else:
-            raise TypeError("ndarray does not support type {} as operand".format(str(type(other))))
+        return mod(other, self)
 
     def __imod__(self, other):
-        raise NotImplementedError
+        """x.__imod__(y) <=> x %= y"""
+        return mod(self, other, out=self)
 
     def __pow__(self, other):
         """x.__pow__(y) <=> x ** y"""
-        if isinstance(other, ndarray):
-            return _npi.power(self, other)
-        elif isinstance(other, numeric_types):
-            return _npi.power_scalar(self, float(other))
-        else:
-            raise TypeError("ndarray does not support type {} as operand".format(str(type(other))))
+        return power(self, other)
 
     def __rpow__(self, other):
         """x.__rpow__(y) <=> y ** x"""
-        if isinstance(other, ndarray):
-            return _npi.power(other, self)
-        elif isinstance(other, numeric_types):
-            return _npi.rpower_scalar(self, float(other))
-        else:
-            raise TypeError("ndarray does not support type {} as operand".format(str(type(other))))
+        return power(other, self)
 
     def __eq__(self, other):
         """x.__eq__(y) <=> x == y"""
@@ -370,6 +319,18 @@ class ndarray(NDArray):
         else:
             raise ValueError("The truth value of an ndarray with multiple elements is ambiguous.")
 
+    def __float__(self):
+        num_elements = self.size
+        if num_elements != 1:
+            raise TypeError('only size-1 arrays can be converted to Python scalars')
+        return float(self.item())
+
+    def __int__(self):
+        num_elements = self.size
+        if num_elements != 1:
+            raise TypeError('only size-1 arrays can be converted to Python scalars')
+        return int(self.item())
+
     def __len__(self):
         """Number of elements along the first axis."""
         return self.shape[0]
@@ -557,7 +518,10 @@ class ndarray(NDArray):
         return self._as_classic_ndarray().copyto(other).as_np_ndarray()
 
     def asscalar(self):
-        raise AttributeError('mxnet.numpy.ndarray object has no attribute as_scalar')
+        raise AttributeError('mxnet.numpy.ndarray object has no attribute asscalar')
+
+    def argmax(self, axis=None, out=None):  # pylint: disable=arguments-differ
+        return _mx_nd_np.argmax(self, axis, out)
 
     def as_in_context(self, context):
         return super(ndarray, self).as_in_context(context).as_np_ndarray()
@@ -722,14 +686,6 @@ class ndarray(NDArray):
         """
         raise NotImplementedError
 
-    def argmax(self, *args, **kwargs):
-        """Convenience fluent method for :py:func:`argmax`.
-
-        The arguments are the same as for :py:func:`argmax`, with
-        this array as data.
-        """
-        raise NotImplementedError
-
     def argmax_channel(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`argmax_channel`.
 
@@ -746,13 +702,11 @@ class ndarray(NDArray):
         """
         raise NotImplementedError
 
-    def clip(self, *args, **kwargs):
-        """Convenience fluent method for :py:func:`clip`.
-
-        The arguments are the same as for :py:func:`clip`, with
-        this array as data.
+    def clip(self, min=None, max=None, out=None):  # pylint: disable=arguments-differ
+        """Return an array whose values are limited to [min, max].
+        One of max or min must be given.
         """
-        raise NotImplementedError
+        return clip(self, min, max, out=out)
 
     def abs(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`abs`.
@@ -882,13 +836,13 @@ class ndarray(NDArray):
         """
         raise AttributeError('mxnet.numpy.ndarray object has no attribute nanprod')
 
-    def mean(self, *args, **kwargs):
+    def mean(self, axis=None, dtype=None, out=None, keepdims=False):  # pylint: disable=arguments-differ
         """Convenience fluent method for :py:func:`mean`.
 
         The arguments are the same as for :py:func:`mean`, with
         this array as data.
         """
-        raise NotImplementedError
+        return _mx_nd_np.mean(self, axis=axis, dtype=dtype, keepdims=keepdims, out=out)
 
     def max(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`max`.
@@ -1511,3 +1465,185 @@ def concatenate(seq, axis=0, out=None):
         The concatenated array.
     """
     return _mx_nd_np.concatenate(seq, axis=axis, out=out)
+
+
+@set_module('mxnet.numpy')
+def add(x1, x2, out=None):
+    """Add arguments element-wise.
+
+    Parameters
+    ----------
+    x1, x2 : ndarrays or scalar values
+        The arrays to be added. If x1.shape != x2.shape, they must be broadcastable to
+        a common shape (which may be the shape of one or the other).
+
+    out : ndarray
+        A location into which the result is stored. If provided, it must have a shape
+        that the inputs broadcast to. If not provided or None, a freshly-allocated array
+        is returned.
+
+    Returns
+    -------
+    add : ndarray or scalar
+        The sum of x1 and x2, element-wise. This is a scalar if both x1 and x2 are scalars.
+    """
+    return _mx_nd_np.add(x1, x2, out)
+
+
+@set_module('mxnet.numpy')
+def subtract(x1, x2, out=None):
+    """Subtract arguments element-wise.
+
+    Parameters
+    ----------
+    x1, x2 : ndarrays or scalar values
+        The arrays to be subtracted from each other. If x1.shape != x2.shape,
+        they must be broadcastable to a common shape (which may be the shape
+        of one or the other).
+
+    out : ndarray
+        A location into which the result is stored. If provided, it must have a shape
+        that the inputs broadcast to. If not provided or None, a freshly-allocated array
+        is returned.
+
+    Returns
+    -------
+    subtract : ndarray or scalar
+        The difference of x1 and x2, element-wise. This is a scalar if both x1 and x2 are scalars.
+    """
+    return _mx_nd_np.subtract(x1, x2, out)
+
+
+@set_module('mxnet.numpy')
+def multiply(x1, x2, out=None):
+    """Multiply arguments element-wise.
+
+    Parameters
+    ----------
+    x1, x2 : ndarrays or scalar values
+        The arrays to be multiplied. If x1.shape != x2.shape, they must be broadcastable to
+        a common shape (which may be the shape of one or the other).
+
+    out : ndarray
+        A location into which the result is stored. If provided, it must have a shape
+        that the inputs broadcast to. If not provided or None, a freshly-allocated array
+        is returned.
+
+    Returns
+    -------
+    out : ndarray or scalar
+        The difference of x1 and x2, element-wise. This is a scalar if both x1 and x2 are scalars.
+    """
+    return _mx_nd_np.multiply(x1, x2, out)
+
+
+@set_module('mxnet.numpy')
+def divide(x1, x2, out=None):
+    """Returns a true division of the inputs, element-wise.
+
+    Parameters
+    ----------
+    x1 : ndarray or scalar
+        Dividend array.
+
+    x2 : ndarray or scalar
+        Divisor array.
+
+    out : ndarray
+        A location into which the result is stored. If provided, it must have a shape
+        that the inputs broadcast to. If not provided or None, a freshly-allocated array
+        is returned.
+
+    Returns
+    -------
+    out : ndarray or scalar
+        This is a scalar if both x1 and x2 are scalars.
+    """
+    return _mx_nd_np.divide(x1, x2, out=out)
+
+
+@set_module('mxnet.numpy')
+def mod(x1, x2, out=None):
+    """Return element-wise remainder of division.
+
+    Parameters
+    ----------
+    x1 : ndarray or scalar
+        Dividend array.
+
+    x2 : ndarray or scalar
+        Divisor array.
+
+    out : ndarray
+        A location into which the result is stored. If provided, it must have a shape
+        that the inputs broadcast to. If not provided or None, a freshly-allocated array
+        is returned.
+
+    Returns
+    -------
+    out : ndarray or scalar
+        This is a scalar if both x1 and x2 are scalars.
+    """
+    return _mx_nd_np.mod(x1, x2, out=out)
+
+
+@set_module('mxnet.numpy')
+def power(x1, x2, out=None):
+    """First array elements raised to powers from second array, element-wise.
+
+    Parameters
+    ----------
+    x1 : ndarray or scalar
+        The bases.
+
+    x2 : ndarray or scalar
+        The exponent.
+
+    out : ndarray
+        A location into which the result is stored. If provided, it must have a shape
+        that the inputs broadcast to. If not provided or None, a freshly-allocated array
+        is returned.
+
+    Returns
+    -------
+    out : ndarray or scalar
+        The bases in x1 raised to the exponents in x2.
+        This is a scalar if both x1 and x2 are scalars.
+    """
+    return _mx_nd_np.power(x1, x2, out=out)
+
+
+@set_module('mxnet.numpy')
+def clip(a, a_min, a_max, out=None):
+    """Clip (limit) the values in an array.
+
+    Given an interval, values outside the interval are clipped to
+    the interval edges.  For example, if an interval of ``[0, 1]``
+    is specified, values smaller than 0 become 0, and values larger
+    than 1 become 1.
+
+    Parameters
+    ----------
+    a : ndarray
+        Array containing elements to clip.
+    a_min : scalar or `None`
+        Minimum value. If `None`, clipping is not performed on lower
+        interval edge. Not more than one of `a_min` and `a_max` may be
+        `None`.
+    a_max : scalar or `None`
+        Maximum value. If `None`, clipping is not performed on upper
+        interval edge. Not more than one of `a_min` and `a_max` may be
+        `None`.
+    out : ndarray, optional
+        The results will be placed in this array. It may be the input
+        array for in-place clipping.  `out` must be of the right shape
+        to hold the output.
+
+    Returns
+    -------
+    clipped_array : ndarray
+        An array with the elements of `a`, but where values
+        < `a_min` are replaced with `a_min`, and those > `a_max`
+        with `a_max`.
+    """
+    return _mx_nd_np.clip(a, a_min, a_max, out=out)
diff --git a/python/mxnet/numpy_extension/__init__.py b/python/mxnet/numpy_extension/__init__.py
index 0c89a88..6419c57 100644
--- a/python/mxnet/numpy_extension/__init__.py
+++ b/python/mxnet/numpy_extension/__init__.py
@@ -24,8 +24,9 @@ from . import _op
 from . import _register
 from ._op import *  # pylint: disable=wildcard-import
 from ..context import *  # pylint: disable=wildcard-import
-from ..util import use_np_shape, np_shape, is_np_shape
-from ..util import use_np_array, np_array, is_np_array, use_np
+from ..util import use_np_shape, np_shape, is_np_shape, set_np_shape
+from ..util import use_np_array, np_array, is_np_array, set_np_array
+from ..util import set_np, use_np
 from .. import autograd
 
 __all__ = []
diff --git a/python/mxnet/optimizer/optimizer.py b/python/mxnet/optimizer/optimizer.py
index 5b433ee..5ab256c 100644
--- a/python/mxnet/optimizer/optimizer.py
+++ b/python/mxnet/optimizer/optimizer.py
@@ -34,6 +34,7 @@ from ..ndarray import (sgd_update, sgd_mom_update, adam_update, rmsprop_update,
                        multi_mp_sgd_mom_update)
 from ..ndarray import sparse
 from ..random import normal
+from ..util import is_np_array
 
 __all__ = [
     'AdaDelta', 'AdaGrad', 'Adam', 'Adamax', 'DCASGD', 'FTML', 'Ftrl', 'LBSGD',
@@ -95,7 +96,7 @@ class Optimizer(object):
     def __init__(self, rescale_grad=1., param_idx2name=None, wd=0.,
                  clip_gradient=None, learning_rate=0.01,
                  lr_scheduler=None, sym=None, begin_num_update=0,
-                 multi_precision=False, param_dict=None, allow_np=False):
+                 multi_precision=False, param_dict=None):
         self.rescale_grad = rescale_grad
         self.lr = learning_rate
         self.lr_scheduler = lr_scheduler
@@ -120,7 +121,7 @@ class Optimizer(object):
         self.idx2name = param_idx2name.copy()
         self.sym_info = (sym.attr_dict(), sym.list_arguments()) if sym is not None else ()
         self.param_dict = param_dict if param_dict else {}
-        self.allow_np = allow_np
+        self.allow_np_array = is_np_array()
 
         self.set_lr_mult({})
         self.set_wd_mult({})
@@ -1648,6 +1649,9 @@ create = Optimizer.create_optimizer  # pylint: disable=invalid-name
 
 
 def _as_classic(a, allow_np):
+    # TODO(junwu): This is a temp solution for allowing converting
+    # np.ndarray to mx.nd.NDArray to be fed into the optimizer since
+    # users may have custom optimizers implemented using mx.nd.NDArray ops.
     from ..numpy import ndarray as np_ndarray
     if isinstance(a, (tuple, list)):
         if any(isinstance(x, np_ndarray) for x in a):
@@ -1675,7 +1679,7 @@ class Updater(object):
 
     def __call__(self, index, grad, weight):
         """Updates weight given gradient and index."""
-        allow_np = self.optimizer.allow_np
+        allow_np = self.optimizer.allow_np_array
         if not isinstance(index, (list, tuple)):
             indices = [index]
             grads = [_as_classic(grad, allow_np)]
diff --git a/python/mxnet/symbol/numpy/_symbol.py b/python/mxnet/symbol/numpy/_symbol.py
index 7a55547..72f9eca 100644
--- a/python/mxnet/symbol/numpy/_symbol.py
+++ b/python/mxnet/symbol/numpy/_symbol.py
@@ -29,7 +29,8 @@ from ..symbol import Symbol
 from .._internal import _set_np_symbol_class
 from . import _internal as _npi
 
-__all__ = ['zeros', 'ones', 'maximum', 'minimum', 'stack', 'concatenate', 'arange', 'argmax']
+__all__ = ['zeros', 'ones', 'maximum', 'minimum', 'stack', 'concatenate', 'arange', 'argmax',
+           'clip', 'add', 'subtract', 'multiply', 'divide', 'mod', 'power']
 
 
 @set_module('mxnet.symbol.numpy')
@@ -45,53 +46,23 @@ class _Symbol(Symbol):
 
     def __add__(self, other):
         """x.__add__(y) <=> x + y"""
-        if isinstance(other, _Symbol):
-            return _npi.add(self, other)
-        elif isinstance(other, numeric_types):
-            return _npi.add_scalar(self, float(other))
-        else:
-            raise TypeError("_Symbol does not support type {} as operand"
-                            .format(str(type(other))))
+        return add(self, other)
 
     def __sub__(self, other):
         """x.__sub__(y) <=> x - y"""
-        if isinstance(other, _Symbol):
-            return _npi.subtract(self, other)
-        elif isinstance(other, numeric_types):
-            return _npi.subtract_scalar(self, float(other))
-        else:
-            raise TypeError("_Symbol does not support type {} as operand"
-                            .format(str(type(other))))
+        return subtract(self, other)
 
     def __rsub__(self, other):
         """x.__rsub__(y) <=> y - x"""
-        if isinstance(other, _Symbol):
-            return _npi.subtract(other, self)
-        elif isinstance(other, numeric_types):
-            return _npi.rsubtract_scalar(self, float(other))
-        else:
-            raise TypeError("_Symbol does not support type {} as operand"
-                            .format(str(type(other))))
+        return subtract(other, self)
 
     def __mul__(self, other):
         """x.__mul__(y) <=> x * y"""
-        if isinstance(other, _Symbol):
-            return _npi.multiply(self, other)
-        elif isinstance(other, numeric_types):
-            return _npi.multiply_scalar(self, float(other))
-        else:
-            raise TypeError("_Symbol does not support type {} as operand"
-                            .format(str(type(other))))
+        return multiply(self, other)
 
     def __rmul__(self, other):
         """x.__rmul__(y) <=> y * x"""
-        if isinstance(other, _Symbol):
-            return _npi.multiply(self, other)
-        elif isinstance(other, numeric_types):
-            return _npi.multiply_scalar(self, float(other))
-        else:
-            raise TypeError("_Symbol does not support type {} as operand"
-                            .format(str(type(other))))
+        return multiply(other, self)
 
     def __div__(self, other):
         raise AttributeError('_Symbol.__div__ is replaced by __truediv__. If you are using'
@@ -109,63 +80,32 @@ class _Symbol(Symbol):
 
     def __mod__(self, other):
         """x.__mod__(y) <=> x % y"""
-        if isinstance(other, _Symbol):
-            return _npi.mod(self, other)
-        elif isinstance(other, numeric_types):
-            return _npi.mod_scalar(self, float(other))
-        else:
-            raise TypeError("_Symbol does not support type {} as operand".format(str(type(other))))
+        return mod(self, other)
 
     def __rmod__(self, other):
         """x.__rmod__(y) <=> y % x"""
-        if isinstance(other, _Symbol):
-            return _npi.mod(other, self)
-        elif isinstance(other, numeric_types):
-            return _npi.rmod_scalar(self, float(other))
-        else:
-            raise TypeError("_Symbol does not support type {} as operand".format(str(type(other))))
+        return mod(other, self)
 
     def __idiv__(self, other):
         raise NotImplementedError
 
     def __truediv__(self, other):
         """x.__truediv__(y) <=> x / y"""
-        if isinstance(other, _Symbol):
-            return _npi.true_divide(self, other)
-        elif isinstance(other, numeric_types):
-            return _npi.true_divide_scalar(self, float(other))
-        else:
-            raise TypeError("_Symbol does not support type {} as divisor".format(str(type(other))))
+        return divide(self, other)
 
     def __rtruediv__(self, other):
         """x.__rtruediv__(y) <=> y / x"""
-        if isinstance(other, _Symbol):
-            return _npi.true_divide(other, self)
-        elif isinstance(other, numeric_types):
-            return _npi.rtrue_divide_scalar(self, float(other)).as_np_ndarray()
-        else:
-            raise TypeError("_Symbol does not support type {} as dividend".format(str(type(other))))
+        return divide(other, self)
 
     def __itruediv__(self, other):
         raise NotImplementedError
 
     def __pow__(self, other):
         """x.__pow__(y) <=> x ** y"""
-        if isinstance(other, _Symbol):
-            return _npi.power(self, other)
-        elif isinstance(other, numeric_types):
-            return _npi.power_scalar(self, float(other))
-        else:
-            raise TypeError("_Symbol does not support type {} as operand".format(str(type(other))))
+        return power(self, other)
 
     def __rpow__(self, other):
-        """x.__rpow__(y) <=> y ** x"""
-        if isinstance(other, _Symbol):
-            return _npi.power(other, self)
-        elif isinstance(other, numeric_types):
-            return _npi.rpower_scalar(self, float(other))
-        else:
-            raise TypeError("_Symbol does not support type {} as operand".format(str(type(other))))
+        return power(other, self)
 
     def __neg__(self):
         """x.__neg__() <=> - x"""
@@ -243,6 +183,10 @@ class _Symbol(Symbol):
         check_call(_LIB.MXShallowCopySymbol(self.handle, ctypes.byref(hdl)))
         return Symbol(handle=hdl)
 
+    def as_np_ndarray(self):
+        """For the convenience of conversion between legacy and np symbols."""
+        return self
+
     @property
     # pylint: disable= invalid-name, undefined-variable
     def T(self):
@@ -262,6 +206,9 @@ class _Symbol(Symbol):
                                       .format(str(order)))
         return _mx_np_op.reshape(self, newshape=shape, order=order)
 
+    def argmax(self, axis=None, out=None):  # pylint: disable=arguments-differ
+        return _mx_np_op.argmax(self, axis, out)
+
     def reshape_like(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`reshape_like`.
 
@@ -406,14 +353,6 @@ class _Symbol(Symbol):
         """
         raise NotImplementedError
 
-    def argmax(self, *args, **kwargs):
-        """Convenience fluent method for :py:func:`argmax`.
-
-        The arguments are the same as for :py:func:`argmax`, with
-        this array as data.
-        """
-        raise NotImplementedError
-
     def argmax_channel(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`argmax_channel`.
 
@@ -430,13 +369,11 @@ class _Symbol(Symbol):
         """
         raise NotImplementedError
 
-    def clip(self, *args, **kwargs):
-        """Convenience fluent method for :py:func:`clip`.
-
-        The arguments are the same as for :py:func:`clip`, with
-        this array as data.
+    def clip(self, min=None, max=None, out=None):  # pylint: disable=arguments-differ
+        """Return an array whose values are limited to [min, max].
+        One of max or min must be given.
         """
-        raise NotImplementedError
+        return clip(self, min, max, out=out)
 
     def abs(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`abs`.
@@ -566,13 +503,13 @@ class _Symbol(Symbol):
         """
         raise AttributeError('_Symbol object has no attribute nanprod')
 
-    def mean(self, *args, **kwargs):
+    def mean(self, axis=None, dtype=None, out=None, keepdims=False):  # pylint: disable=arguments-differ
         """Convenience fluent method for :py:func:`mean`.
 
         The arguments are the same as for :py:func:`mean`, with
         this array as data.
         """
-        raise NotImplementedError
+        return _mx_np_op.mean(self, axis=axis, dtype=dtype, keepdims=keepdims, out=out)
 
     def max(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`max`.
@@ -1031,11 +968,44 @@ def minimum(x1, x2, out=None):
 
 
 @set_module('mxnet.symbol.numpy')
+def add(x1, x2, out=None):
+    return _ufunc_helper(x1, x2, _npi.add, _np.add, _npi.add_scalar, None, out)
+
+
+@set_module('mxnet.symbol.numpy')
+def subtract(x1, x2, out=None):
+    return _ufunc_helper(x1, x2, _npi.subtract, _np.subtract, _npi.subtract_scalar,
+                         _npi.rsubtract_scalar, out)
+
+
+@set_module('mxnet.symbol.numpy')
+def multiply(x1, x2, out=None):
+    return _ufunc_helper(x1, x2, _npi.multiply, _np.multiply, _npi.multiply_scalar, None, out)
+
+
+@set_module('mxnet.symbol.numpy')
+def divide(x1, x2, out=None):
+    return _ufunc_helper(x1, x2, _npi.true_divide, _np.divide, _npi.true_divide_scalar,
+                         _npi.rtrue_divide_scalar, out)
+
+
+@set_module('mxnet.symbol.numpy')
+def mod(x1, x2, out=None):
+    return _ufunc_helper(x1, x2, _npi.mod, _np.mod, _npi.mod_scalar, _npi.rmod_scalar, out)
+
+
+@set_module('mxnet.symbol.numpy')
+def power(x1, x2, out=None):
+    return _ufunc_helper(x1, x2, _npi.power, _np.power, _npi.power_scalar, _npi.rpower_scalar, out)
+
+
+@set_module('mxnet.symbol.numpy')
 def stack(arrays, axis=0, out=None):
     """Join a sequence of arrays along a new axis.
 
-        The axis parameter specifies the index of the new axis in the dimensions of the result.
-        For example, if `axis=0` it will be the first dimension and if `axis=-1` it will be the last dimension.
+    The axis parameter specifies the index of the new axis in the dimensions of the result.
+    For example, if `axis=0` it will be the first dimension and if `axis=-1` it will be the last
+    dimension.
 
     Parameters
     ----------
@@ -1161,4 +1131,46 @@ def argmax(a, axis=None, out=None):
     return _npi.argmax(a, axis=axis, keepdims=False, out=out)
 
 
+@set_module('mxnet.symbol.numpy')
+def clip(a, a_min, a_max, out=None):
+    """Clip (limit) the values in an array.
+
+    Given an interval, values outside the interval are clipped to
+    the interval edges.  For example, if an interval of ``[0, 1]``
+    is specified, values smaller than 0 become 0, and values larger
+    than 1 become 1.
+
+    Parameters
+    ----------
+    a : _Symbol
+        Array containing elements to clip.
+    a_min : scalar or `None`
+        Minimum value. If `None`, clipping is not performed on lower
+        interval edge. Not more than one of `a_min` and `a_max` may be
+        `None`.
+    a_max : scalar or `None`
+        Maximum value. If `None`, clipping is not performed on upper
+        interval edge. Not more than one of `a_min` and `a_max` may be
+        `None`.
+    out : _Symbol, optional
+        The results will be placed in this array. It may be the input
+        array for in-place clipping.  `out` must be of the right shape
+        to hold the output.
+
+    Returns
+    -------
+    clipped_array : _Symbol
+        An array with the elements of `a`, but where values
+        < `a_min` are replaced with `a_min`, and those > `a_max`
+        with `a_max`.
+    """
+    if a_min is None and a_max is None:
+        raise ValueError('array_clip: must set either max or min')
+    if a_min is None:
+        a_min = float('-inf')
+    if a_max is None:
+        a_max = float('inf')
+    return _npi.clip(a, a_min, a_max, out=out)
+
+
 _set_np_symbol_class(_Symbol)
diff --git a/python/mxnet/symbol/register.py b/python/mxnet/symbol/register.py
index a835e2e..2bf3fbd 100644
--- a/python/mxnet/symbol/register.py
+++ b/python/mxnet/symbol/register.py
@@ -227,7 +227,13 @@ def %s(%s):"""%(func_name, ', '.join(signature)))
         _vals.append(%s)"""%(name, name, name))
             # dtype
             if dtype_name is not None:
-                code.append("""
+                if is_np_op:
+                    code.append("""
+    if %s is not _Null and %s is not None:
+        _keys.append('%s')
+        _vals.append(_np.dtype(%s).name)"""%(dtype_name, dtype_name, dtype_name, dtype_name))
+                else:
+                    code.append("""
     if %s is not _Null:
         _keys.append('%s')
         _vals.append(_np.dtype(%s).name)"""%(dtype_name, dtype_name, dtype_name))
diff --git a/python/mxnet/symbol/symbol.py b/python/mxnet/symbol/symbol.py
index 96397f6..87893c4 100644
--- a/python/mxnet/symbol/symbol.py
+++ b/python/mxnet/symbol/symbol.py
@@ -68,6 +68,10 @@ class Symbol(SymbolBase):
         check_call(_LIB.MXShallowCopySymbol(self.handle, ctypes.byref(hdl)))
         return _Symbol(hdl)
 
+    def as_classic_ndarray(self):
+        """Returns self. For the convenience of conversion between legacy and np symbols."""
+        return self
+
     def __repr__(self):
         """Gets a string representation of the symbol."""
         name = self.name
diff --git a/python/mxnet/util.py b/python/mxnet/util.py
index 60c35bd..013a717 100644
--- a/python/mxnet/util.py
+++ b/python/mxnet/util.py
@@ -334,7 +334,7 @@ class _NumpyArrayScope(object):
     """
     _current = threading.local()
 
-    def __init__(self, is_np_array):  #pylint: disable=redefined-outer-name
+    def __init__(self, is_np_array):  # pylint: disable=redefined-outer-name
         self._old_scope = None
         self._is_np_array = is_np_array
 
@@ -545,3 +545,39 @@ def use_np(func):
         A function or class wrapped in the Numpy-shape and NumPy-array scope.
     """
     return use_np_array(use_np_shape(func))
+
+
+def set_np_array(active):
+    """Turns on/off NumPy array semantics for the current thread in which `mxnet.numpy.ndarray`
+    is expected to be created, instead of the legacy `mx.nd.NDArray`.
+
+    Parameters
+    ---------
+    active : bool
+        A boolean value indicating whether the NumPy-array semantics should be turned on or off.
+
+    Returns
+    -------
+        A bool value indicating the previous state of NumPy array semantics.
+    """
+    cur_state = is_np_array()
+    _NumpyArrayScope._current.value = _NumpyArrayScope(active)
+    return cur_state
+
+
+def set_np(shape=True, array=True):
+    """A convenience function for setting NumPy shape and array semantics at the same time.
+
+    Parameters
+    ----------
+    shape : bool
+        A boolean value indicating whether the NumPy-shape semantics should be turned on or off.
+    array : bool
+        A boolean value indicating whether the NumPy-array semantics should be turned on or off.
+
+    Returns
+    -------
+        A tuple with elements indicating the previous states of shape and array
+        semantics, respectively.
+    """
+    return set_np_shape(shape), set_np_array(array)
diff --git a/src/operator/nn/activation.cc b/src/operator/nn/activation.cc
index 5b6cece..3d668c8 100644
--- a/src/operator/nn/activation.cc
+++ b/src/operator/nn/activation.cc
@@ -154,6 +154,7 @@ inline static bool BackwardActStorageType(const nnvm::NodeAttrs& attrs,
 
 
 MXNET_OPERATOR_REGISTER_UNARY(Activation)
+.add_alias("_npx_Activation")
 .describe(R"code(Applies an activation function element-wise to the input.
 
 The following activation functions are supported:
diff --git a/src/operator/nn/batch_norm.cc b/src/operator/nn/batch_norm.cc
index 2564609..030f589 100644
--- a/src/operator/nn/batch_norm.cc
+++ b/src/operator/nn/batch_norm.cc
@@ -520,6 +520,7 @@ std::vector<nnvm::NodeEntry> BatchNormGrad(const nnvm::NodePtr& n,
 }
 
 NNVM_REGISTER_OP(BatchNorm)
+.add_alias("_npx_BatchNorm")
 .describe(R"code(Batch normalization.
 
 Normalizes a data batch by mean and variance, and applies a scale ``gamma`` as
diff --git a/src/operator/nn/convolution.cc b/src/operator/nn/convolution.cc
index 536e9a7..6ab388a 100644
--- a/src/operator/nn/convolution.cc
+++ b/src/operator/nn/convolution.cc
@@ -397,6 +397,7 @@ struct ConvolutionGrad {
 };
 
 NNVM_REGISTER_OP(Convolution)
+.add_alias("_npx_Convolution")
 .describe(R"code(Compute *N*-D convolution on *(N+2)*-D input.
 
 In the 2-D convolution, given input data with shape *(batch_size,
diff --git a/src/operator/nn/fully_connected.cc b/src/operator/nn/fully_connected.cc
index 27f6595..9f30ed2 100644
--- a/src/operator/nn/fully_connected.cc
+++ b/src/operator/nn/fully_connected.cc
@@ -244,6 +244,7 @@ DMLC_REGISTER_PARAMETER(FullyConnectedParam);
 
 NNVM_REGISTER_OP(FullyConnected)
 MXNET_ADD_SPARSE_OP_ALIAS(FullyConnected)
+.add_alias("_npx_FullyConnected")
 .describe(R"code(Applies a linear transformation: :math:`Y = XW^T + b`.
 
 If ``flatten`` is set to be true, then the shapes are:
diff --git a/src/operator/nn/pooling.cc b/src/operator/nn/pooling.cc
index 41a486e..0df5827 100644
--- a/src/operator/nn/pooling.cc
+++ b/src/operator/nn/pooling.cc
@@ -364,7 +364,8 @@ inline static bool BackwardPoolingStorageType(const nnvm::NodeAttrs &attrs,
 DMLC_REGISTER_PARAMETER(PoolingParam);
 
 NNVM_REGISTER_OP(Pooling)
-    .describe(R"code(Performs pooling on the input.
+.add_alias("_npx_Pooling")
+.describe(R"code(Performs pooling on the input.
 
 The shapes for 1-D pooling are
 
diff --git a/src/operator/random/shuffle_op.cc b/src/operator/random/shuffle_op.cc
index 7031571..86797c1 100644
--- a/src/operator/random/shuffle_op.cc
+++ b/src/operator/random/shuffle_op.cc
@@ -122,6 +122,7 @@ void ShuffleForwardCPU(const nnvm::NodeAttrs& attrs,
 
 NNVM_REGISTER_OP(_shuffle)
 .add_alias("shuffle")
+.add_alias("_np__random_shuffle")
 .describe(R"code(Randomly shuffle the elements.
 
 This shuffles the array along the first axis.
diff --git a/src/operator/tensor/elemwise_unary_op_basic.cc b/src/operator/tensor/elemwise_unary_op_basic.cc
index 6da384d..4594b48 100644
--- a/src/operator/tensor/elemwise_unary_op_basic.cc
+++ b/src/operator/tensor/elemwise_unary_op_basic.cc
@@ -1289,6 +1289,7 @@ MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU_DR(_backward_expm1, unary_bwd<msh
 // gamma
 MXNET_OPERATOR_REGISTER_UNARY_WITH_SPARSE_DR(gamma, cpu, mshadow_op::gamma)
 MXNET_ADD_SPARSE_OP_ALIAS(gamma)
+.add_alias("_npx_gamma")
 .describe(R"code(Returns the gamma function (extension of the factorial function \
 to the reals), computed element-wise on the input array.
 
diff --git a/src/operator/tensor/matrix_op.cc b/src/operator/tensor/matrix_op.cc
index e78050a..0f059e2 100644
--- a/src/operator/tensor/matrix_op.cc
+++ b/src/operator/tensor/matrix_op.cc
@@ -696,6 +696,7 @@ NNVM_REGISTER_OP(_backward_slice_like)
 
 NNVM_REGISTER_OP(clip)
 MXNET_ADD_SPARSE_OP_ALIAS(clip)
+.add_alias("_npi_clip")
 .describe(R"code(Clips (limits) the values in an array.
 
 Given an interval, values outside the interval are clipped to the interval edges.
diff --git a/tests/python/unittest/test_numpy_gluon.py b/tests/python/unittest/test_numpy_gluon.py
index 0fcb874..b4db7bf 100644
--- a/tests/python/unittest/test_numpy_gluon.py
+++ b/tests/python/unittest/test_numpy_gluon.py
@@ -18,6 +18,7 @@
 # pylint: skip-file
 from __future__ import absolute_import
 from __future__ import division
+
 import mxnet as mx
 from mxnet import gluon, autograd, np, npx
 
@@ -61,8 +62,8 @@ def test_create_np_param():
     check_block_params(x.as_np_ndarray(), TestBlock2, True, np.ndarray)
 
 
+@npx.use_np
 def test_optimizer_with_np_ndarrays():
-    @npx.use_np
     class LinearRegression(gluon.HybridBlock):
         def __init__(self, num_input_dim=0, num_hidden_dim=100, num_output_dim=10):
             super(LinearRegression, self).__init__()
@@ -78,7 +79,6 @@ def test_optimizer_with_np_ndarrays():
             y_pred = h_relu.dot(w2)  # equivalent to F.np.dot(h_relu, w2)
             return y_pred
 
-    @npx.use_np
     class TotalLoss(gluon.HybridBlock):
         def hybrid_forward(self, F, pred, label):
             return ((pred - label) ** 2).sum()  # equivalent to F.np.sum(F.np.square(pred - label))
@@ -97,7 +97,7 @@ def test_optimizer_with_np_ndarrays():
 
     trainer = gluon.Trainer(regressor.collect_params(),
                             'sgd',
-                            {'learning_rate': 1e-3, 'momentum': 0.9, 'allow_np': True})
+                            {'learning_rate': 1e-3, 'momentum': 0.9})
 
     for t in range(5):
         with autograd.record():