You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by jx...@apache.org on 2017/08/29 17:35:00 UTC
[incubator-mxnet] branch master updated: Refactor random linalg contrib namespaces (#7604)

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
     new 94a2c60  Refactor random linalg contrib namespaces (#7604)
94a2c60 is described below

commit 94a2c60e079505164a8928a5803ebe7106920e9f
Author: reminisce <wu...@gmail.com>
AuthorDate: Tue Aug 29 10:34:56 2017 -0700

    Refactor random linalg contrib namespaces (#7604)
    
    * Refactor namespaces contrib, linalg, random, and sparse for op registration
    
    Change examples in documentation
    
    Change namespace usage in examples
    
    Fix pylint
    
    Remove unused import
    
    Switch name and alias in linalg and random
    
    Change stype comparison from string to int for functions used internally
    
    Change documentation to use the right namespace
    
    Register ops under ndarray/op.py and symbol/op.py
    
    Remove unused import
    
    Change .cu op names
    
    * Add __all__ to ndarray and symbol modules
    
    * Revert "Add __all__ to ndarray and symbol modules"
    
    This reverts commit 8bc5de77bfdb40ff48dc570e2c6c49ec5d43ea64.
    
    * Add __all__ to ndarray and symbol modules
---
 benchmark/python/sparse/dot.py                     |   2 +-
 benchmark/python/sparse/sparse_op.py               |   6 +-
 example/ctc/lstm.py                                |   2 +-
 example/gluon/dcgan.py                             |   2 +-
 example/gluon/lstm_crf.py                          |   6 +-
 example/rcnn/rcnn/symbol/symbol_resnet.py          |   4 +-
 example/rcnn/rcnn/symbol/symbol_vgg.py             |   6 +-
 example/ssd/symbol/common.py                       |   5 +-
 example/ssd/symbol/legacy_vgg16_ssd_300.py         |   6 +-
 example/ssd/symbol/legacy_vgg16_ssd_512.py         |   6 +-
 example/ssd/symbol/symbol_builder.py               |   6 +-
 .../ssd/tools/caffe_converter/convert_symbol.py    |   4 +-
 python/mxnet/_ctypes/ndarray.py                    |  23 +--
 python/mxnet/autograd.py                           |   3 +-
 python/mxnet/base.py                               |  83 +++++++++
 python/mxnet/contrib/autograd.py                   |   3 +-
 python/mxnet/contrib/ndarray.py                    |   1 +
 python/mxnet/contrib/symbol.py                     |   1 +
 python/mxnet/gluon/block.py                        |   2 +-
 python/mxnet/gluon/rnn/rnn_layer.py                |  14 +-
 python/mxnet/ndarray/__init__.py                   |   8 +-
 python/mxnet/ndarray/_internal.py                  |   1 +
 .../mxnet/{symbol/sparse.py => ndarray/contrib.py} |   3 +-
 .../mxnet/{symbol/sparse.py => ndarray/linalg.py}  |   3 +-
 python/mxnet/ndarray/ndarray.py                    | 201 +++++++++++----------
 python/mxnet/ndarray/op.py                         |  49 +----
 .../mxnet/{symbol/sparse.py => ndarray/random.py}  |   3 +-
 python/mxnet/ndarray/sparse.py                     |  40 ++--
 python/mxnet/random.py                             |   8 +-
 python/mxnet/symbol/__init__.py                    |   7 +-
 python/mxnet/symbol/_internal.py                   |   1 +
 python/mxnet/symbol/{sparse.py => contrib.py}      |   3 +-
 python/mxnet/symbol/{sparse.py => linalg.py}       |   3 +-
 python/mxnet/symbol/op.py                          |  45 +----
 python/mxnet/symbol/{sparse.py => random.py}       |   3 +-
 python/mxnet/symbol/sparse.py                      |   1 +
 python/mxnet/symbol/symbol.py                      |  96 +++++-----
 src/operator/random/sample_op.cc                   |  49 ++---
 src/operator/random/sample_op.cu                   |   4 +-
 src/operator/tensor/la_op.cc                       |  49 ++---
 src/operator/tensor/la_op.cu                       |  14 +-
 src/operator/tensor/matrix_op.cc                   |   1 +
 tests/python/gpu/test_operator_gpu.py              |  20 +-
 tests/python/unittest/test_autograd.py             |   4 +-
 tests/python/unittest/test_gluon.py                |   2 +-
 tests/python/unittest/test_gluon_model_zoo.py      |   4 +-
 tests/python/unittest/test_module.py               |  28 +--
 tests/python/unittest/test_ndarray.py              |   2 +-
 tests/python/unittest/test_operator.py             |  56 +++---
 tests/python/unittest/test_random.py               |  14 +-
 tests/python/unittest/test_sparse_ndarray.py       |   4 +-
 51 files changed, 477 insertions(+), 434 deletions(-)

diff --git a/benchmark/python/sparse/dot.py b/benchmark/python/sparse/dot.py
index fe32282..aab34db 100644
--- a/benchmark/python/sparse/dot.py
+++ b/benchmark/python/sparse/dot.py
@@ -161,7 +161,7 @@ def _compare_sparse_dense(data_dir, file_name, mini_file_name, feature_dim,
         weight_row_dim = batch_size if transpose else feature_dim
         weight_shape = (weight_row_dim, output_dim)
         if not rsp:
-            weight = mx.nd.random_uniform(low=0, high=1, shape=weight_shape)
+            weight = mx.nd.random.uniform(low=0, high=1, shape=weight_shape)
         else:
             weight = rand_ndarray(weight_shape, "row_sparse", density=0.05, distribution="uniform")
         total_cost = {}
diff --git a/benchmark/python/sparse/sparse_op.py b/benchmark/python/sparse/sparse_op.py
index 0683aa8..ebe62af 100644
--- a/benchmark/python/sparse/sparse_op.py
+++ b/benchmark/python/sparse/sparse_op.py
@@ -101,7 +101,7 @@ def test_dot_real(data_dict):
         # model
         data_shape = (k, )
         train_iter = get_iter(mini_path, data_shape, batch_size)
-        weight = mx.nd.random_uniform(low=0, high=1, shape=(k, m))
+        weight = mx.nd.random.uniform(low=0, high=1, shape=(k, m))
 
         csr_data = []
         dns_data = []
@@ -154,7 +154,7 @@ def test_dot_synthetic():
 
     def bench_dot_forward(m, k, n, density, ctx, repeat):
         set_default_context(ctx)
-        dns = mx.nd.random_uniform(shape=(k, n)).copyto(ctx)
+        dns = mx.nd.random.uniform(shape=(k, n)).copyto(ctx)
         data_shape = (m, k)
         csr_data = rand_ndarray(data_shape, 'csr', density)
         dns_data = csr_data.tostype('default')
@@ -183,7 +183,7 @@ def test_dot_synthetic():
 
     def bench_dot_backward(m, k, n, density, ctx, repeat):
         set_default_context(ctx)
-        dns = mx.nd.random_uniform(shape=(m, n)).copyto(ctx)
+        dns = mx.nd.random.uniform(shape=(m, n)).copyto(ctx)
         data_shape = (m, k)
         csr_data = rand_ndarray(data_shape, 'csr', density)
         dns_data = csr_data.tostype('default')
diff --git a/example/ctc/lstm.py b/example/ctc/lstm.py
index 7e18c86..326daa1 100644
--- a/example/ctc/lstm.py
+++ b/example/ctc/lstm.py
@@ -96,7 +96,7 @@ def lstm_unroll(num_lstm_layer, seq_len,
     pred_fc = mx.sym.FullyConnected(data=hidden_concat, num_hidden=11)
     pred_ctc = mx.sym.Reshape(data=pred_fc, shape=(-4, seq_len, -1, 0))
 
-    loss = mx.contrib.sym.ctc_loss(data=pred_ctc, label=label)
+    loss = mx.sym.contrib.ctc_loss(data=pred_ctc, label=label)
     ctc_loss = mx.sym.MakeLoss(loss)
 
     softmax_class = mx.symbol.SoftmaxActivation(data=pred_fc)
diff --git a/example/gluon/dcgan.py b/example/gluon/dcgan.py
index ed814df..3233f43 100644
--- a/example/gluon/dcgan.py
+++ b/example/gluon/dcgan.py
@@ -184,7 +184,7 @@ for epoch in range(opt.nepoch):
         ###########################
         # train with real_t
         data = data.as_in_context(ctx)
-        noise = mx.nd.random_normal(0, 1, shape=(opt.batch_size, nz, 1, 1), ctx=ctx)
+        noise = mx.nd.random.normal(0, 1, shape=(opt.batch_size, nz, 1, 1), ctx=ctx)
 
         with autograd.record():
             output = netD(data)
diff --git a/example/gluon/lstm_crf.py b/example/gluon/lstm_crf.py
index 40c8c2b..857bfca 100644
--- a/example/gluon/lstm_crf.py
+++ b/example/gluon/lstm_crf.py
@@ -62,13 +62,13 @@ class BiLSTM_CRF(Block):
 
             # Matrix of transition parameters.  Entry i,j is the score of
             # transitioning *to* i *from* j.
-            self.transitions = nd.random_normal(shape=(self.tagset_size, self.tagset_size))
+            self.transitions = nd.random.normal(shape=(self.tagset_size, self.tagset_size))
 
             self.hidden = self.init_hidden()
 
     def init_hidden(self):
-        return [nd.random_normal(shape=(2, 1, self.hidden_dim // 2)),
-                nd.random_normal(shape=(2, 1, self.hidden_dim // 2))]
+        return [nd.random.normal(shape=(2, 1, self.hidden_dim // 2)),
+                nd.random.normal(shape=(2, 1, self.hidden_dim // 2))]
 
     def _forward_alg(self, feats):
         # Do the forward algorithm to compute the partition function
diff --git a/example/rcnn/rcnn/symbol/symbol_resnet.py b/example/rcnn/rcnn/symbol/symbol_resnet.py
index f914d11..4a9677d 100644
--- a/example/rcnn/rcnn/symbol/symbol_resnet.py
+++ b/example/rcnn/rcnn/symbol/symbol_resnet.py
@@ -113,7 +113,7 @@ def get_resnet_train(num_classes=config.NUM_CLASSES, num_anchors=config.NUM_ANCH
     rpn_cls_act_reshape = mx.symbol.Reshape(
         data=rpn_cls_act, shape=(0, 2 * num_anchors, -1, 0), name='rpn_cls_act_reshape')
     if config.TRAIN.CXX_PROPOSAL:
-        rois = mx.contrib.symbol.Proposal(
+        rois = mx.symbol.contrib.Proposal(
             cls_prob=rpn_cls_act_reshape, bbox_pred=rpn_bbox_pred, im_info=im_info, name='rois',
             feature_stride=config.RPN_FEAT_STRIDE, scales=tuple(config.ANCHOR_SCALES), ratios=tuple(config.ANCHOR_RATIOS),
             rpn_pre_nms_top_n=config.TRAIN.RPN_PRE_NMS_TOP_N, rpn_post_nms_top_n=config.TRAIN.RPN_POST_NMS_TOP_N,
@@ -189,7 +189,7 @@ def get_resnet_test(num_classes=config.NUM_CLASSES, num_anchors=config.NUM_ANCHO
     rpn_cls_prob_reshape = mx.symbol.Reshape(
         data=rpn_cls_prob, shape=(0, 2 * num_anchors, -1, 0), name='rpn_cls_prob_reshape')
     if config.TEST.CXX_PROPOSAL:
-        rois = mx.contrib.symbol.Proposal(
+        rois = mx.symbol.contrib.Proposal(
             cls_prob=rpn_cls_prob_reshape, bbox_pred=rpn_bbox_pred, im_info=im_info, name='rois',
             feature_stride=config.RPN_FEAT_STRIDE, scales=tuple(config.ANCHOR_SCALES), ratios=tuple(config.ANCHOR_RATIOS),
             rpn_pre_nms_top_n=config.TEST.RPN_PRE_NMS_TOP_N, rpn_post_nms_top_n=config.TEST.RPN_POST_NMS_TOP_N,
diff --git a/example/rcnn/rcnn/symbol/symbol_vgg.py b/example/rcnn/rcnn/symbol/symbol_vgg.py
index f04ba89..00ba15e 100644
--- a/example/rcnn/rcnn/symbol/symbol_vgg.py
+++ b/example/rcnn/rcnn/symbol/symbol_vgg.py
@@ -242,7 +242,7 @@ def get_vgg_rpn_test(num_anchors=config.NUM_ANCHORS):
     rpn_cls_prob_reshape = mx.symbol.Reshape(
         data=rpn_cls_prob, shape=(0, 2 * num_anchors, -1, 0), name='rpn_cls_prob_reshape')
     if config.TEST.CXX_PROPOSAL:
-        group = mx.contrib.symbol.Proposal(
+        group = mx.symbol.contrib.Proposal(
             cls_prob=rpn_cls_prob_reshape, bbox_pred=rpn_bbox_pred, im_info=im_info, name='rois', output_score=True,
             feature_stride=config.RPN_FEAT_STRIDE, scales=tuple(config.ANCHOR_SCALES), ratios=tuple(config.ANCHOR_RATIOS),
             rpn_pre_nms_top_n=config.TEST.PROPOSAL_PRE_NMS_TOP_N, rpn_post_nms_top_n=config.TEST.PROPOSAL_POST_NMS_TOP_N,
@@ -290,7 +290,7 @@ def get_vgg_test(num_classes=config.NUM_CLASSES, num_anchors=config.NUM_ANCHORS)
     rpn_cls_prob_reshape = mx.symbol.Reshape(
         data=rpn_cls_prob, shape=(0, 2 * num_anchors, -1, 0), name='rpn_cls_prob_reshape')
     if config.TEST.CXX_PROPOSAL:
-        rois = mx.contrib.symbol.Proposal(
+        rois = mx.symbol.contrib.Proposal(
             cls_prob=rpn_cls_prob_reshape, bbox_pred=rpn_bbox_pred, im_info=im_info, name='rois',
             feature_stride=config.RPN_FEAT_STRIDE, scales=tuple(config.ANCHOR_SCALES), ratios=tuple(config.ANCHOR_RATIOS),
             rpn_pre_nms_top_n=config.TEST.RPN_PRE_NMS_TOP_N, rpn_post_nms_top_n=config.TEST.RPN_POST_NMS_TOP_N,
@@ -373,7 +373,7 @@ def get_vgg_train(num_classes=config.NUM_CLASSES, num_anchors=config.NUM_ANCHORS
     rpn_cls_act_reshape = mx.symbol.Reshape(
         data=rpn_cls_act, shape=(0, 2 * num_anchors, -1, 0), name='rpn_cls_act_reshape')
     if config.TRAIN.CXX_PROPOSAL:
-        rois = mx.contrib.symbol.Proposal(
+        rois = mx.symbol.contrib.Proposal(
             cls_prob=rpn_cls_act_reshape, bbox_pred=rpn_bbox_pred, im_info=im_info, name='rois',
             feature_stride=config.RPN_FEAT_STRIDE, scales=tuple(config.ANCHOR_SCALES), ratios=tuple(config.ANCHOR_RATIOS),
             rpn_pre_nms_top_n=config.TRAIN.RPN_PRE_NMS_TOP_N, rpn_post_nms_top_n=config.TRAIN.RPN_POST_NMS_TOP_N,
diff --git a/example/ssd/symbol/common.py b/example/ssd/symbol/common.py
index ea58c15..4a0458f 100644
--- a/example/ssd/symbol/common.py
+++ b/example/ssd/symbol/common.py
@@ -283,8 +283,9 @@ def multibox_layer(from_layers, num_classes, sizes=[.2, .95],
             step = (steps[k], steps[k])
         else:
             step = '(-1.0, -1.0)'
-        anchors = mx.contrib.symbol.MultiBoxPrior(from_layer, sizes=size_str, ratios=ratio_str, \
-            clip=clip, name="{}_anchors".format(from_name), steps=step)
+        anchors = mx.symbol.contrib.MultiBoxPrior(from_layer, sizes=size_str, ratios=ratio_str,
+                                                  clip=clip, name="{}_anchors".format(from_name),
+                                                  steps=step)
         anchors = mx.symbol.Flatten(data=anchors)
         anchor_layers.append(anchors)
 
diff --git a/example/ssd/symbol/legacy_vgg16_ssd_300.py b/example/ssd/symbol/legacy_vgg16_ssd_300.py
index c1f8ea7..29fc30b 100644
--- a/example/ssd/symbol/legacy_vgg16_ssd_300.py
+++ b/example/ssd/symbol/legacy_vgg16_ssd_300.py
@@ -144,7 +144,7 @@ def get_symbol_train(num_classes=20, nms_thresh=0.5, force_suppress=False,
         num_classes, sizes=sizes, ratios=ratios, normalization=normalizations, \
         num_channels=num_channels, clip=False, interm_layer=0, steps=steps)
 
-    tmp = mx.contrib.symbol.MultiBoxTarget(
+    tmp = mx.symbol.contrib.MultiBoxTarget(
         *[anchor_boxes, label, cls_preds], overlap_threshold=.5, \
         ignore_label=-1, negative_mining_ratio=3, minimum_negative_samples=0, \
         negative_mining_thresh=.5, variances=(0.1, 0.1, 0.2, 0.2),
@@ -163,7 +163,7 @@ def get_symbol_train(num_classes=20, nms_thresh=0.5, force_suppress=False,
 
     # monitoring training status
     cls_label = mx.symbol.MakeLoss(data=cls_target, grad_scale=0, name="cls_label")
-    det = mx.contrib.symbol.MultiBoxDetection(*[cls_prob, loc_preds, anchor_boxes], \
+    det = mx.symbol.contrib.MultiBoxDetection(*[cls_prob, loc_preds, anchor_boxes], \
         name="detection", nms_threshold=nms_thresh, force_suppress=force_suppress,
         variances=(0.1, 0.1, 0.2, 0.2), nms_topk=nms_topk)
     det = mx.symbol.MakeLoss(data=det, grad_scale=0, name="det_out")
@@ -202,7 +202,7 @@ def get_symbol(num_classes=20, nms_thresh=0.5, force_suppress=False,
 
     cls_prob = mx.symbol.SoftmaxActivation(data=cls_preds, mode='channel', \
         name='cls_prob')
-    out = mx.contrib.symbol.MultiBoxDetection(*[cls_prob, loc_preds, anchor_boxes], \
+    out = mx.symbol.contrib.MultiBoxDetection(*[cls_prob, loc_preds, anchor_boxes], \
         name="detection", nms_threshold=nms_thresh, force_suppress=force_suppress,
         variances=(0.1, 0.1, 0.2, 0.2), nms_topk=nms_topk)
     return out
diff --git a/example/ssd/symbol/legacy_vgg16_ssd_512.py b/example/ssd/symbol/legacy_vgg16_ssd_512.py
index 6cc3aa2..c5c3095 100644
--- a/example/ssd/symbol/legacy_vgg16_ssd_512.py
+++ b/example/ssd/symbol/legacy_vgg16_ssd_512.py
@@ -148,7 +148,7 @@ def get_symbol_train(num_classes=20, nms_thresh=0.5, force_suppress=False, nms_t
         num_classes, sizes=sizes, ratios=ratios, normalization=normalizations, \
         num_channels=num_channels, clip=False, interm_layer=0, steps=steps)
 
-    tmp = mx.contrib.symbol.MultiBoxTarget(
+    tmp = mx.symbol.contrib.MultiBoxTarget(
         *[anchor_boxes, label, cls_preds], overlap_threshold=.5, \
         ignore_label=-1, negative_mining_ratio=3, minimum_negative_samples=0, \
         negative_mining_thresh=.5, variances=(0.1, 0.1, 0.2, 0.2),
@@ -167,7 +167,7 @@ def get_symbol_train(num_classes=20, nms_thresh=0.5, force_suppress=False, nms_t
 
     # monitoring training status
     cls_label = mx.symbol.MakeLoss(data=cls_target, grad_scale=0, name="cls_label")
-    det = mx.contrib.symbol.MultiBoxDetection(*[cls_prob, loc_preds, anchor_boxes], \
+    det = mx.symbol.contrib.MultiBoxDetection(*[cls_prob, loc_preds, anchor_boxes], \
         name="detection", nms_threshold=nms_thresh, force_suppress=force_suppress,
         variances=(0.1, 0.1, 0.2, 0.2), nms_topk=nms_topk)
     det = mx.symbol.MakeLoss(data=det, grad_scale=0, name="det_out")
@@ -205,7 +205,7 @@ def get_symbol(num_classes=20, nms_thresh=0.5, force_suppress=False, nms_topk=40
 
     cls_prob = mx.symbol.SoftmaxActivation(data=cls_preds, mode='channel', \
         name='cls_prob')
-    out = mx.contrib.symbol.MultiBoxDetection(*[cls_prob, loc_preds, anchor_boxes], \
+    out = mx.symbol.contrib.MultiBoxDetection(*[cls_prob, loc_preds, anchor_boxes], \
         name="detection", nms_threshold=nms_thresh, force_suppress=force_suppress,
         variances=(0.1, 0.1, 0.2, 0.2), nms_topk=nms_topk)
     return out
diff --git a/example/ssd/symbol/symbol_builder.py b/example/ssd/symbol/symbol_builder.py
index 4cd7f88..0c7b5c1 100644
--- a/example/ssd/symbol/symbol_builder.py
+++ b/example/ssd/symbol/symbol_builder.py
@@ -87,7 +87,7 @@ def get_symbol_train(network, num_classes, from_layers, num_filters, strides, pa
         num_classes, sizes=sizes, ratios=ratios, normalization=normalizations, \
         num_channels=num_filters, clip=False, interm_layer=0, steps=steps)
 
-    tmp = mx.contrib.symbol.MultiBoxTarget(
+    tmp = mx.symbol.contrib.MultiBoxTarget(
         *[anchor_boxes, label, cls_preds], overlap_threshold=.5, \
         ignore_label=-1, negative_mining_ratio=3, minimum_negative_samples=0, \
         negative_mining_thresh=.5, variances=(0.1, 0.1, 0.2, 0.2),
@@ -106,7 +106,7 @@ def get_symbol_train(network, num_classes, from_layers, num_filters, strides, pa
 
     # monitoring training status
     cls_label = mx.symbol.MakeLoss(data=cls_target, grad_scale=0, name="cls_label")
-    det = mx.contrib.symbol.MultiBoxDetection(*[cls_prob, loc_preds, anchor_boxes], \
+    det = mx.symbol.contrib.MultiBoxDetection(*[cls_prob, loc_preds, anchor_boxes], \
         name="detection", nms_threshold=nms_thresh, force_suppress=force_suppress,
         variances=(0.1, 0.1, 0.2, 0.2), nms_topk=nms_topk)
     det = mx.symbol.MakeLoss(data=det, grad_scale=0, name="det_out")
@@ -177,7 +177,7 @@ def get_symbol(network, num_classes, from_layers, num_filters, sizes, ratios,
 
     cls_prob = mx.symbol.SoftmaxActivation(data=cls_preds, mode='channel', \
         name='cls_prob')
-    out = mx.contrib.symbol.MultiBoxDetection(*[cls_prob, loc_preds, anchor_boxes], \
+    out = mx.symbol.contrib.MultiBoxDetection(*[cls_prob, loc_preds, anchor_boxes], \
         name="detection", nms_threshold=nms_thresh, force_suppress=force_suppress,
         variances=(0.1, 0.1, 0.2, 0.2), nms_topk=nms_topk)
     return out
diff --git a/example/ssd/tools/caffe_converter/convert_symbol.py b/example/ssd/tools/caffe_converter/convert_symbol.py
index 10510aa..5ce7023 100644
--- a/example/ssd/tools/caffe_converter/convert_symbol.py
+++ b/example/ssd/tools/caffe_converter/convert_symbol.py
@@ -265,7 +265,7 @@ def proto2script(proto_file):
             finput_dim = float(input_dim[2])
             step = '(%f, %f)' % (step_h / finput_dim, step_w / finput_dim)
             assert param.offset == 0.5, "currently only support offset = 0.5"
-            symbol_string += '%s = mx.contrib.symbol.MultiBoxPrior(%s, sizes=%s, ratios=%s, clip=%s, steps=%s, name="%s")\n' % \
+            symbol_string += '%s = mx.symbol.contrib.MultiBoxPrior(%s, sizes=%s, ratios=%s, clip=%s, steps=%s, name="%s")\n' % \
                 (name, mapping[layer[i].bottom[0]], sizes, ratios_string, clip, step, name)
             symbol_string += '%s = mx.symbol.Flatten(data=%s)\n' % (name, name)
             type_string = 'split'
@@ -281,7 +281,7 @@ def proto2script(proto_file):
             assert param.share_location == True
             assert param.background_label_id == 0
             nms_param = param.nms_param
-            type_string = 'mx.contrib.symbol.MultiBoxDetection'
+            type_string = 'mx.symbol.contrib.MultiBoxDetection'
             param_string = "nms_threshold=%f, nms_topk=%d" % \
                 (nms_param.nms_threshold, nms_param.top_k)
         if type_string == '':
diff --git a/python/mxnet/_ctypes/ndarray.py b/python/mxnet/_ctypes/ndarray.py
index c2e6fce..0d02c04 100644
--- a/python/mxnet/_ctypes/ndarray.py
+++ b/python/mxnet/_ctypes/ndarray.py
@@ -22,22 +22,11 @@
 from __future__ import absolute_import as _abs
 
 import ctypes
-import sys as _sys
-import numpy as np
 
 from ..base import _LIB
-from ..base import c_array, py_str, c_str, mx_uint, _Null
-from ..base import NDArrayHandle, OpHandle, CachedOpHandle
+from ..base import c_array, c_str
+from ..base import NDArrayHandle, CachedOpHandle
 from ..base import check_call
-from ..ndarray_doc import _build_doc
-
-
-_STORAGE_TYPE_ID_TO_STR = {
-    -1 : 'undefined',
-    0  : 'default',
-    1  : 'row_sparse',
-    2  : 'csr',
-}
 
 
 class NDArrayBase(object):
@@ -106,10 +95,10 @@ def _imperative_invoke(handle, ndargs, keys, vals, out):
         return original_output
     if num_output.value == 1:
         return _ndarray_cls(ctypes.cast(output_vars[0], NDArrayHandle),
-                            stype=_STORAGE_TYPE_ID_TO_STR[out_stypes[0]])
+                            stype=out_stypes[0])
     else:
         return [_ndarray_cls(ctypes.cast(output_vars[i], NDArrayHandle),
-                             stype=_STORAGE_TYPE_ID_TO_STR[out_stypes[i]])
+                             stype=out_stypes[i])
                 for i in range(num_output.value)]
 
 
@@ -160,8 +149,8 @@ class CachedOp(object):
             return original_output
         if num_output.value == 1:
             return _ndarray_cls(ctypes.cast(output_vars[0], NDArrayHandle),
-                                stype=_STORAGE_TYPE_ID_TO_STR[out_stypes[0]])
+                                stype=out_stypes[0])
         else:
             return [_ndarray_cls(ctypes.cast(output_vars[i], NDArrayHandle),
-                                 stype=_STORAGE_TYPE_ID_TO_STR[out_stypes[i]])
+                                 stype=out_stypes[i])
                     for i in range(num_output.value)]
diff --git a/python/mxnet/autograd.py b/python/mxnet/autograd.py
index 292bcc2..bca1dc2 100644
--- a/python/mxnet/autograd.py
+++ b/python/mxnet/autograd.py
@@ -27,7 +27,8 @@ from ctypes import c_int, c_void_p, CFUNCTYPE, POINTER, cast
 from .base import _LIB, check_call, string_types
 from .base import mx_uint, NDArrayHandle, c_array, MXCallbackList, SymbolHandle
 from .ndarray import NDArray
-from .symbol import _GRAD_REQ_MAP, Symbol
+from .ndarray import _GRAD_REQ_MAP
+from .symbol import Symbol
 
 
 def set_recording(is_recording): #pylint: disable=redefined-outer-name
diff --git a/python/mxnet/base.py b/python/mxnet/base.py
index d446355..e422dad 100644
--- a/python/mxnet/base.py
+++ b/python/mxnet/base.py
@@ -307,6 +307,7 @@ def _notify_shutdown():
 
 atexit.register(_notify_shutdown)
 
+
 def add_fileline_to_docstring(module, incursive=True):
     """Append the definition position to each function contained in module.
 
@@ -342,6 +343,7 @@ def add_fileline_to_docstring(module, incursive=True):
         if inspect.isclass(obj) and incursive:
             add_fileline_to_docstring(obj, False)
 
+
 def _as_list(obj):
     """A utility function that converts the argument to a list if it is not already.
 
@@ -359,3 +361,84 @@ def _as_list(obj):
         return obj
     else:
         return [obj]
+
+
+_OP_NAME_PREFIX_LIST = ['_contrib_', '_linalg_', '_random_', '_sparse_']
+
+
+def _get_op_name_prefix(op_name):
+    """
+    Check whether the given op_name starts with any words in `_OP_NAME_PREFIX_LIST`.
+    If found, return the prefix; else, return an empty string.
+    """
+    for prefix in _OP_NAME_PREFIX_LIST:
+        if op_name.startswith(prefix):
+            return prefix
+    return ""
+
+
+# pylint: enable=too-many-locals, invalid-name
+def _init_op_module(root_namespace, module_name, make_op_func):
+    """
+    Registers op functions created by `make_op_func` under
+    `root_namespace.module_name.[submodule_name]`,
+    where `submodule_name` is one of `_OP_SUBMODULE_NAME_LIST`.
+
+    Parameters
+    ----------
+    root_namespace : str
+        Top level module name, `mxnet` in the current cases.
+    module_name : str
+        Second level module name, `ndarray` and `symbol` in the current cases.
+    make_op_func : str
+        Function for creating op functions for `ndarray` and `symbol` modules.
+    """
+    plist = ctypes.POINTER(ctypes.c_char_p)()
+    size = ctypes.c_uint()
+
+    check_call(_LIB.MXListAllOpNames(ctypes.byref(size),
+                                     ctypes.byref(plist)))
+    op_names = []
+    for i in range(size.value):
+        op_names.append(py_str(plist[i]))
+
+    module_op = sys.modules["%s.%s.op" % (root_namespace, module_name)]
+    module_internal = sys.modules["%s.%s._internal" % (root_namespace, module_name)]
+    # contrib module in the old format (deprecated)
+    # kept here for backward compatibility
+    # use mx.nd.contrib or mx.sym.contrib from now on
+    contrib_module_name_old = "%s.contrib.%s" % (root_namespace, module_name)
+    contrib_module_old = sys.modules[contrib_module_name_old]
+    submodule_dict = {}
+    for op_name_prefix in _OP_NAME_PREFIX_LIST:
+        submodule_dict[op_name_prefix] =\
+            sys.modules["%s.%s.%s" % (root_namespace, module_name, op_name_prefix[1:-1])]
+    for name in op_names:
+        hdl = OpHandle()
+        check_call(_LIB.NNGetOpHandle(c_str(name), ctypes.byref(hdl)))
+        function = make_op_func(hdl, name)
+        op_name_prefix = _get_op_name_prefix(function.__name__)
+        if len(op_name_prefix) > 0:
+            # register op under mxnet.module_name.op_name_prefix[1:-1]
+            # e.g. mxnet.ndarray.sparse.dot, mxnet.symbol.linalg.gemm
+            function.__name__ = function.__name__[len(op_name_prefix):]
+            function.__module__ = "%s.%s.%s" % (root_namespace, module_name, op_name_prefix[1:-1])
+            cur_module = submodule_dict[op_name_prefix]
+            setattr(cur_module, function.__name__, function)
+            cur_module.__all__.append(function.__name__)
+            # if op_name_prefix is '_contrib_', also need to register
+            # the op under mxnet.contrib.module_name for backward compatibility
+            if op_name_prefix == '_contrib_':
+                hdl = OpHandle()
+                check_call(_LIB.NNGetOpHandle(c_str(name), ctypes.byref(hdl)))
+                function = make_op_func(hdl, name)
+                function.__name__ = function.__name__[len(op_name_prefix):]
+                function.__module__ = contrib_module_name_old
+                setattr(contrib_module_old, function.__name__, function)
+                contrib_module_old.__all__.append(function.__name__)
+        elif function.__name__.startswith('_'):
+            setattr(module_internal, function.__name__, function)
+            module_internal.__all__.append(function.__name__)
+        else:
+            setattr(module_op, function.__name__, function)
+            module_op.__all__.append(function.__name__)
diff --git a/python/mxnet/contrib/autograd.py b/python/mxnet/contrib/autograd.py
index 2d2500e..68ce31b 100644
--- a/python/mxnet/contrib/autograd.py
+++ b/python/mxnet/contrib/autograd.py
@@ -25,8 +25,7 @@ import functools
 from ..base import _LIB, check_call, string_types
 from ..base import mx_uint, NDArrayHandle, c_array
 # pylint: disable= unused-import
-from ..ndarray import NDArray, zeros_like
-from ..symbol import _GRAD_REQ_MAP
+from ..ndarray import NDArray, zeros_like, _GRAD_REQ_MAP
 
 
 def set_is_training(is_train):
diff --git a/python/mxnet/contrib/ndarray.py b/python/mxnet/contrib/ndarray.py
index 3c86fe7..f65c75e 100644
--- a/python/mxnet/contrib/ndarray.py
+++ b/python/mxnet/contrib/ndarray.py
@@ -17,3 +17,4 @@
 
 # coding: utf-8
 """NDArray namespace used to register contrib functions"""
+__all__ = []
diff --git a/python/mxnet/contrib/symbol.py b/python/mxnet/contrib/symbol.py
index 1d53345..90f6dae 100644
--- a/python/mxnet/contrib/symbol.py
+++ b/python/mxnet/contrib/symbol.py
@@ -17,3 +17,4 @@
 
 # coding: utf-8
 """Symbol namespace used to register contrib functions"""
+__all__ = []
diff --git a/python/mxnet/gluon/block.py b/python/mxnet/gluon/block.py
index 74a9058..d611481 100644
--- a/python/mxnet/gluon/block.py
+++ b/python/mxnet/gluon/block.py
@@ -459,7 +459,7 @@ class SymbolBlock(HybridBlock):
                    internals['model_dense1_relu_fwd_output']]
     >>> # Create SymbolBlock that shares parameters with alexnet
     >>> feat_model = gluon.SymbolBlock(outputs, inputs, params=alexnet.collect_params())
-    >>> x = mx.nd.random_normal(shape=(16, 3, 224, 224))
+    >>> x = mx.nd.random.normal(shape=(16, 3, 224, 224))
     >>> print(feat_model(x))
     """
     def __init__(self, outputs, inputs, params=None):
diff --git a/python/mxnet/gluon/rnn/rnn_layer.py b/python/mxnet/gluon/rnn/rnn_layer.py
index 063d566..b280752 100644
--- a/python/mxnet/gluon/rnn/rnn_layer.py
+++ b/python/mxnet/gluon/rnn/rnn_layer.py
@@ -301,11 +301,11 @@ class RNN(_RNNLayer):
     --------
     >>> layer = mx.gluon.rnn.RNN(100, 3)
     >>> layer.initialize()
-    >>> input = mx.nd.random_uniform(shape=(5, 3, 10))
+    >>> input = mx.nd.random.uniform(shape=(5, 3, 10))
     >>> # by default zeros are used as begin state
     >>> output = layer(input)
     >>> # manually specify begin state.
-    >>> h0 = mx.nd.random_uniform(shape=(3, 3, 100))
+    >>> h0 = mx.nd.random.uniform(shape=(3, 3, 100))
     >>> output, hn = layer(input, h0)
     """
     def __init__(self, hidden_size, num_layers=1, activation='relu',
@@ -404,12 +404,12 @@ class LSTM(_RNNLayer):
     --------
     >>> layer = mx.gluon.rnn.LSTM(100, 3)
     >>> layer.initialize()
-    >>> input = mx.nd.random_uniform(shape=(5, 3, 10))
+    >>> input = mx.nd.random.uniform(shape=(5, 3, 10))
     >>> # by default zeros are used as begin state
     >>> output = layer(input)
     >>> # manually specify begin state.
-    >>> h0 = mx.nd.random_uniform(shape=(3, 3, 100))
-    >>> c0 = mx.nd.random_uniform(shape=(3, 3, 100))
+    >>> h0 = mx.nd.random.uniform(shape=(3, 3, 100))
+    >>> c0 = mx.nd.random.uniform(shape=(3, 3, 100))
     >>> output, hn = layer(input, [h0, c0])
     """
     def __init__(self, hidden_size, num_layers=1, layout='TNC',
@@ -503,11 +503,11 @@ class GRU(_RNNLayer):
     --------
     >>> layer = mx.gluon.rnn.GRU(100, 3)
     >>> layer.initialize()
-    >>> input = mx.nd.random_uniform(shape=(5, 3, 10))
+    >>> input = mx.nd.random.uniform(shape=(5, 3, 10))
     >>> # by default zeros are used as begin state
     >>> output = layer(input)
     >>> # manually specify begin state.
-    >>> h0 = mx.nd.random_uniform(shape=(3, 3, 100))
+    >>> h0 = mx.nd.random.uniform(shape=(3, 3, 100))
     >>> output, hn = layer(input, h0)
     """
     def __init__(self, hidden_size, num_layers=1, layout='TNC',
diff --git a/python/mxnet/ndarray/__init__.py b/python/mxnet/ndarray/__init__.py
index 6322078..43ec961 100644
--- a/python/mxnet/ndarray/__init__.py
+++ b/python/mxnet/ndarray/__init__.py
@@ -17,9 +17,13 @@
 
 """NDArray API of MXNet."""
 
-from . import _internal, sparse, op
-from .op import CachedOp
+from . import _internal, contrib, linalg, random, sparse
 # pylint: disable=wildcard-import, redefined-builtin
+from .op import *
 from .ndarray import *
+# pylint: enable=wildcard-import
 from .utils import load, save, zeros, empty, array
 from .sparse import _ndarray_cls
+from .ndarray import _GRAD_REQ_MAP
+
+__all__ = op.__all__ + ndarray.__all__ + ['contrib', 'linalg', 'random', 'sparse']
diff --git a/python/mxnet/ndarray/_internal.py b/python/mxnet/ndarray/_internal.py
index 8f151f1..96eff31 100644
--- a/python/mxnet/ndarray/_internal.py
+++ b/python/mxnet/ndarray/_internal.py
@@ -16,3 +16,4 @@
 # under the License.
 
 """NDArray namespace used to register internal functions."""
+__all__ = []
diff --git a/python/mxnet/symbol/sparse.py b/python/mxnet/ndarray/contrib.py
similarity index 94%
copy from python/mxnet/symbol/sparse.py
copy to python/mxnet/ndarray/contrib.py
index 1d94f2b..f21d144 100644
--- a/python/mxnet/symbol/sparse.py
+++ b/python/mxnet/ndarray/contrib.py
@@ -15,4 +15,5 @@
 # specific language governing permissions and limitations
 # under the License.
 
-"""Sparse Symbol API of MXNet."""
+"""Contrib NDArray API of MXNet."""
+__all__ = []
diff --git a/python/mxnet/symbol/sparse.py b/python/mxnet/ndarray/linalg.py
similarity index 93%
copy from python/mxnet/symbol/sparse.py
copy to python/mxnet/ndarray/linalg.py
index 1d94f2b..0c8e7fd 100644
--- a/python/mxnet/symbol/sparse.py
+++ b/python/mxnet/ndarray/linalg.py
@@ -15,4 +15,5 @@
 # specific language governing permissions and limitations
 # under the License.
 
-"""Sparse Symbol API of MXNet."""
+"""Linear Algebra NDArray API of MXNet."""
+__all__ = []
diff --git a/python/mxnet/ndarray/ndarray.py b/python/mxnet/ndarray/ndarray.py
index a85ccb5..b0500d3 100644
--- a/python/mxnet/ndarray/ndarray.py
+++ b/python/mxnet/ndarray/ndarray.py
@@ -37,16 +37,8 @@ from ..base import mx_uint, NDArrayHandle, check_call
 from ..base import ctypes2buffer
 from ..context import Context
 from . import _internal
-from .op import NDArrayBase, _STORAGE_TYPE_ID_TO_STR
-from . import cast_storage
-from . import broadcast_add, broadcast_mul, transpose, broadcast_not_equal, broadcast_power
-from . import broadcast_sub, broadcast_div, broadcast_to, broadcast_axes, broadcast_equal
-from . import broadcast_greater, broadcast_greater_equal, broadcast_lesser, broadcast_lesser_equal
-from . import zeros_like, ones_like, broadcast_minimum, broadcast_maximum, broadcast_mod
-from . import flatten, norm, rint, fix, floor, ceil, split, slice_axis, one_hot, pick, take
-from . import trunc, expand_dims, flip, tile, repeat, pad, clip, sign
-from . import nansum, prod, nanprod, mean, sort, topk, argsort, argmax, argmin
-from . import sum, round, max, min, slice, abs # pylint: disable=redefined-builtin
+from . import op
+from .op import NDArrayBase
 
 __all__ = ["NDArray", "concatenate", "_DTYPE_NP_TO_MX", "_DTYPE_MX_TO_NP", "_GRAD_REQ_MAP",
            "ones", "add", "arange", "divide", "equal", "full", "greater", "greater_equal",
@@ -54,33 +46,48 @@ __all__ = ["NDArray", "concatenate", "_DTYPE_NP_TO_MX", "_DTYPE_MX_TO_NP", "_GRA
            "multiply", "negative", "not_equal", "onehot_encode", "power", "subtract",
            "true_divide", "waitall", "_new_empty_handle"]
 
+_STORAGE_TYPE_UNDEFINED = -1
+_STORAGE_TYPE_DEFAULT = 0
+_STORAGE_TYPE_ROW_SPARSE = 1
+_STORAGE_TYPE_CSR = 2
+
 # pylint: disable= no-member
 _DTYPE_NP_TO_MX = {
-    None       : -1,
-    np.float32 : 0,
-    np.float64 : 1,
-    np.float16 : 2,
-    np.uint8   : 3,
-    np.int32   : 4,
-    np.int8    : 5,
-    np.int64   : 6,
+    None: -1,
+    np.float32: 0,
+    np.float64: 1,
+    np.float16: 2,
+    np.uint8: 3,
+    np.int32: 4,
+    np.int8: 5,
+    np.int64: 6,
 }
+
 _DTYPE_MX_TO_NP = {
-    -1 : None,
-    0 : np.float32,
-    1 : np.float64,
-    2 : np.float16,
-    3 : np.uint8,
-    4 : np.int32,
-    5 : np.int8,
-    6 : np.int64,
+    -1: None,
+    0: np.float32,
+    1: np.float64,
+    2: np.float16,
+    3: np.uint8,
+    4: np.int32,
+    5: np.int8,
+    6: np.int64,
 }
+
 _STORAGE_TYPE_STR_TO_ID = {
-    'undefined'  : -1,
-    'default'    : 0,
-    'row_sparse' : 1,
-    'csr'        : 2,
+    'undefined': _STORAGE_TYPE_UNDEFINED,
+    'default': _STORAGE_TYPE_DEFAULT,
+    'row_sparse': _STORAGE_TYPE_ROW_SPARSE,
+    'csr': _STORAGE_TYPE_CSR,
 }
+
+_STORAGE_TYPE_ID_TO_STR = {
+    _STORAGE_TYPE_UNDEFINED: 'undefined',
+    _STORAGE_TYPE_DEFAULT: 'default',
+    _STORAGE_TYPE_ROW_SPARSE: 'row_sparse',
+    _STORAGE_TYPE_CSR: 'csr',
+}
+
 _GRAD_REQ_MAP = {
     'null': 0,
     'write': 1,
@@ -137,7 +144,7 @@ def waitall():
 def _storage_type(handle):
     storage_type = ctypes.c_int(0)
     check_call(_LIB.MXNDArrayGetStorageType(handle, ctypes.byref(storage_type)))
-    return _STORAGE_TYPE_ID_TO_STR[storage_type.value]
+    return storage_type.value
 
 
 class NDArray(NDArrayBase):
@@ -169,7 +176,7 @@ fixed-size items.
         if not self.writable:
             raise ValueError('trying to add to a readonly NDArray')
         if isinstance(other, NDArray):
-            return broadcast_add(self, other, out=self)
+            return op.broadcast_add(self, other, out=self)
         elif isinstance(other, numeric_types):
             return _internal._plus_scalar(self, float(other), out=self)
         else:
@@ -187,7 +194,7 @@ fixed-size items.
         if not self.writable:
             raise ValueError('trying to subtract from a readonly NDArray')
         if isinstance(other, NDArray):
-            return broadcast_sub(self, other, out=self)
+            return op.broadcast_sub(self, other, out=self)
         elif isinstance(other, numeric_types):
             return _internal._minus_scalar(self, float(other), out=self)
         else:
@@ -210,7 +217,7 @@ fixed-size items.
         if not self.writable:
             raise ValueError('trying to multiply to a readonly NDArray')
         if isinstance(other, NDArray):
-            return broadcast_mul(self, other, out=self)
+            return op.broadcast_mul(self, other, out=self)
         elif isinstance(other, numeric_types):
             return _internal._mul_scalar(self, float(other), out=self)
         else:
@@ -232,7 +239,7 @@ fixed-size items.
         if not self.writable:
             raise ValueError('trying to divide from a readonly NDArray')
         if isinstance(other, NDArray):
-            return broadcast_div(self, other, out=self)
+            return op.broadcast_div(self, other, out=self)
         elif isinstance(other, numeric_types):
             return _internal._div_scalar(self, float(other), out=self)
         else:
@@ -260,7 +267,7 @@ fixed-size items.
         if not self.writable:
             raise ValueError('trying to take modulo from a readonly NDArray')
         if isinstance(other, NDArray):
-            return broadcast_mod(self, other, out=self)
+            return op.broadcast_mod(self, other, out=self)
         elif isinstance(other, numeric_types):
             return _internal._mod_scalar(self, float(other), out=self)
         else:
@@ -525,7 +532,7 @@ fixed-size items.
             oshape.extend(shape[i+1:])
             if len(oshape) == 0:
                 oshape.append(1)
-            return slice(self, begin, end).reshape(oshape)
+            return op.slice(self, begin, end).reshape(oshape)
         else:
             raise ValueError(
                 "NDArray does not support slicing with key %s of type %s."%(
@@ -684,7 +691,7 @@ fixed-size items.
         The arguments are the same as for :py:func:`zeros_like`, with
         this array as data.
         """
-        return zeros_like(self, *args, **kwargs)
+        return op.zeros_like(self, *args, **kwargs)
 
     def ones_like(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`ones_like`.
@@ -692,7 +699,7 @@ fixed-size items.
         The arguments are the same as for :py:func:`ones_like`, with
         this array as data.
         """
-        return ones_like(self, *args, **kwargs)
+        return op.ones_like(self, *args, **kwargs)
 
     def broadcast_axes(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`broadcast_axes`.
@@ -700,7 +707,7 @@ fixed-size items.
         The arguments are the same as for :py:func:`broadcast_axes`, with
         this array as data.
         """
-        return broadcast_axes(self, *args, **kwargs)
+        return op.broadcast_axes(self, *args, **kwargs)
 
     def repeat(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`repeat`.
@@ -708,7 +715,7 @@ fixed-size items.
         The arguments are the same as for :py:func:`repeat`, with
         this array as data.
         """
-        return repeat(self, *args, **kwargs)
+        return op.repeat(self, *args, **kwargs)
 
     def pad(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`pad`.
@@ -716,7 +723,7 @@ fixed-size items.
         The arguments are the same as for :py:func:`pad`, with
         this array as data.
         """
-        return pad(self, *args, **kwargs)
+        return op.pad(self, *args, **kwargs)
 
     def swapaxes(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`swapaxes`.
@@ -724,7 +731,7 @@ fixed-size items.
         The arguments are the same as for :py:func:`swapaxes`, with
         this array as data.
         """
-        return swapaxes(self, *args, **kwargs)
+        return op.swapaxes(self, *args, **kwargs)
 
     def split(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`split`.
@@ -732,7 +739,7 @@ fixed-size items.
         The arguments are the same as for :py:func:`split`, with
         this array as data.
         """
-        return split(self, *args, **kwargs)
+        return op.split(self, *args, **kwargs)
 
     def slice(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`slice`.
@@ -740,7 +747,7 @@ fixed-size items.
         The arguments are the same as for :py:func:`slice`, with
         this array as data.
         """
-        return slice(self, *args, **kwargs)
+        return op.slice(self, *args, **kwargs)
 
     def slice_axis(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`slice_axis`.
@@ -748,7 +755,7 @@ fixed-size items.
         The arguments are the same as for :py:func:`slice_axis`, with
         this array as data.
         """
-        return slice_axis(self, *args, **kwargs)
+        return op.slice_axis(self, *args, **kwargs)
 
     def take(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`take`.
@@ -756,7 +763,7 @@ fixed-size items.
         The arguments are the same as for :py:func:`take`, with
         this array as data.
         """
-        return take(self, *args, **kwargs)
+        return op.take(self, *args, **kwargs)
 
     def one_hot(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`one_hot`.
@@ -764,7 +771,7 @@ fixed-size items.
         The arguments are the same as for :py:func:`one_hot`, with
         this array as data.
         """
-        return one_hot(self, *args, **kwargs)
+        return op.one_hot(self, *args, **kwargs)
 
     def pick(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`pick`.
@@ -772,7 +779,7 @@ fixed-size items.
         The arguments are the same as for :py:func:`pick`, with
         this array as data.
         """
-        return pick(self, *args, **kwargs)
+        return op.pick(self, *args, **kwargs)
 
     def sort(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`sort`.
@@ -780,7 +787,7 @@ fixed-size items.
         The arguments are the same as for :py:func:`sort`, with
         this array as data.
         """
-        return sort(self, *args, **kwargs)
+        return op.sort(self, *args, **kwargs)
 
     def topk(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`topk`.
@@ -788,7 +795,7 @@ fixed-size items.
         The arguments are the same as for :py:func:`topk`, with
         this array as data.
         """
-        return topk(self, *args, **kwargs)
+        return op.topk(self, *args, **kwargs)
 
     def argsort(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`argsort`.
@@ -796,7 +803,7 @@ fixed-size items.
         The arguments are the same as for :py:func:`argsort`, with
         this array as data.
         """
-        return argsort(self, *args, **kwargs)
+        return op.argsort(self, *args, **kwargs)
 
     def argmax(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`argmax`.
@@ -804,7 +811,7 @@ fixed-size items.
         The arguments are the same as for :py:func:`argmax`, with
         this array as data.
         """
-        return argmax(self, *args, **kwargs)
+        return op.argmax(self, *args, **kwargs)
 
     def argmin(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`argmin`.
@@ -812,7 +819,7 @@ fixed-size items.
         The arguments are the same as for :py:func:`argmin`, with
         this array as data.
         """
-        return argmin(self, *args, **kwargs)
+        return op.argmin(self, *args, **kwargs)
 
     def clip(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`clip`.
@@ -820,7 +827,7 @@ fixed-size items.
         The arguments are the same as for :py:func:`clip`, with
         this array as data.
         """
-        return clip(self, *args, **kwargs)
+        return op.clip(self, *args, **kwargs)
 
     def abs(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`abs`.
@@ -828,7 +835,7 @@ fixed-size items.
         The arguments are the same as for :py:func:`abs`, with
         this array as data.
         """
-        return abs(self, *args, **kwargs)
+        return op.abs(self, *args, **kwargs)
 
     def sign(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`sign`.
@@ -836,7 +843,7 @@ fixed-size items.
         The arguments are the same as for :py:func:`sign`, with
         this array as data.
         """
-        return sign(self, *args, **kwargs)
+        return op.sign(self, *args, **kwargs)
 
     def flatten(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`flatten`.
@@ -844,7 +851,7 @@ fixed-size items.
         The arguments are the same as for :py:func:`flatten`, with
         this array as data.
         """
-        return flatten(self, *args, **kwargs)
+        return op.flatten(self, *args, **kwargs)
 
     def expand_dims(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`expand_dims`.
@@ -852,7 +859,7 @@ fixed-size items.
         The arguments are the same as for :py:func:`expand_dims`, with
         this array as data.
         """
-        return expand_dims(self, *args, **kwargs)
+        return op.expand_dims(self, *args, **kwargs)
 
     def tile(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`tile`.
@@ -860,7 +867,7 @@ fixed-size items.
         The arguments are the same as for :py:func:`tile`, with
         this array as data.
         """
-        return tile(self, *args, **kwargs)
+        return op.tile(self, *args, **kwargs)
 
     def transpose(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`transpose`.
@@ -868,7 +875,7 @@ fixed-size items.
         The arguments are the same as for :py:func:`transpose`, with
         this array as data.
         """
-        return transpose(self, *args, **kwargs)
+        return op.transpose(self, *args, **kwargs)
 
     def flip(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`flip`.
@@ -876,7 +883,7 @@ fixed-size items.
         The arguments are the same as for :py:func:`flip`, with
         this array as data.
         """
-        return flip(self, *args, **kwargs)
+        return op.flip(self, *args, **kwargs)
 
     def sum(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`sum`.
@@ -884,7 +891,7 @@ fixed-size items.
         The arguments are the same as for :py:func:`sum`, with
         this array as data.
         """
-        return sum(self, *args, **kwargs)
+        return op.sum(self, *args, **kwargs)
 
     def nansum(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`nansum`.
@@ -892,7 +899,7 @@ fixed-size items.
         The arguments are the same as for :py:func:`nansum`, with
         this array as data.
         """
-        return nansum(self, *args, **kwargs)
+        return op.nansum(self, *args, **kwargs)
 
     def prod(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`prod`.
@@ -900,7 +907,7 @@ fixed-size items.
         The arguments are the same as for :py:func:`prod`, with
         this array as data.
         """
-        return prod(self, *args, **kwargs)
+        return op.prod(self, *args, **kwargs)
 
     def nanprod(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`nanprod`.
@@ -908,7 +915,7 @@ fixed-size items.
         The arguments are the same as for :py:func:`nanprod`, with
         this array as data.
         """
-        return nanprod(self, *args, **kwargs)
+        return op.nanprod(self, *args, **kwargs)
 
     def mean(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`mean`.
@@ -916,7 +923,7 @@ fixed-size items.
         The arguments are the same as for :py:func:`mean`, with
         this array as data.
         """
-        return mean(self, *args, **kwargs)
+        return op.mean(self, *args, **kwargs)
 
     def max(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`max`.
@@ -924,7 +931,7 @@ fixed-size items.
         The arguments are the same as for :py:func:`max`, with
         this array as data.
         """
-        return max(self, *args, **kwargs)
+        return op.max(self, *args, **kwargs)
 
     def min(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`min`.
@@ -932,7 +939,7 @@ fixed-size items.
         The arguments are the same as for :py:func:`min`, with
         this array as data.
         """
-        return min(self, *args, **kwargs)
+        return op.min(self, *args, **kwargs)
 
     def norm(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`norm`.
@@ -940,7 +947,7 @@ fixed-size items.
         The arguments are the same as for :py:func:`norm`, with
         this array as data.
         """
-        return norm(self, *args, **kwargs)
+        return op.norm(self, *args, **kwargs)
 
     def round(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`round`.
@@ -948,7 +955,7 @@ fixed-size items.
         The arguments are the same as for :py:func:`round`, with
         this array as data.
         """
-        return round(self, *args, **kwargs)
+        return op.round(self, *args, **kwargs)
 
     def rint(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`rint`.
@@ -956,7 +963,7 @@ fixed-size items.
         The arguments are the same as for :py:func:`rint`, with
         this array as data.
         """
-        return rint(self, *args, **kwargs)
+        return op.rint(self, *args, **kwargs)
 
     def fix(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`fix`.
@@ -964,7 +971,7 @@ fixed-size items.
         The arguments are the same as for :py:func:`fix`, with
         this array as data.
         """
-        return fix(self, *args, **kwargs)
+        return op.fix(self, *args, **kwargs)
 
     def floor(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`floor`.
@@ -972,7 +979,7 @@ fixed-size items.
         The arguments are the same as for :py:func:`floor`, with
         this array as data.
         """
-        return floor(self, *args, **kwargs)
+        return op.floor(self, *args, **kwargs)
 
     def ceil(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`ceil`.
@@ -980,7 +987,7 @@ fixed-size items.
         The arguments are the same as for :py:func:`ceil`, with
         this array as data.
         """
-        return ceil(self, *args, **kwargs)
+        return op.ceil(self, *args, **kwargs)
 
     def trunc(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`trunc`.
@@ -988,7 +995,7 @@ fixed-size items.
         The arguments are the same as for :py:func:`trunc`, with
         this array as data.
         """
-        return trunc(self, *args, **kwargs)
+        return op.trunc(self, *args, **kwargs)
 
     # pylint: disable= undefined-variable
     def broadcast_to(self, shape):
@@ -1038,9 +1045,9 @@ fixed-size items.
         if (cur_shape_arr[broadcasting_axes] != 1).any():
             raise ValueError(err_str)
         if cur_shape != self.shape:
-            return broadcast_to(self.reshape(cur_shape), shape=shape)
+            return op.broadcast_to(self.reshape(cur_shape), shape=shape)
         else:
-            return broadcast_to(self, shape=tuple(shape))
+            return op.broadcast_to(self, shape=tuple(shape))
     # pylint: enable= undefined-variable
 
     def wait_to_read(self):
@@ -1166,7 +1173,7 @@ fixed-size items.
     def stype(self):
         """Storage-type of the array.
         """
-        return _storage_type(self.handle)
+        return _STORAGE_TYPE_ID_TO_STR[_storage_type(self.handle)]
 
     @property
     # pylint: disable= invalid-name, undefined-variable
@@ -1193,7 +1200,7 @@ fixed-size items.
         """
         if len(self.shape) < 2:
             return self
-        return transpose(self)
+        return op.transpose(self)
     # pylint: enable= invalid-name, undefined-variable
 
     @property
@@ -1381,7 +1388,7 @@ fixed-size items.
             - 'add': gradient will be added to existing value on every backward.
             - 'null': do not compute gradient for this NDArray.
         """
-        grad = zeros_like(self)  # pylint: disable=undefined-variable
+        grad = op.zeros_like(self)  # pylint: disable=undefined-variable
         grad_req = _GRAD_REQ_MAP[grad_req]
         check_call(_LIB.MXAutogradMarkVariables(
             1, ctypes.pointer(self.handle),
@@ -1440,7 +1447,7 @@ fixed-size items.
         NDArray, CSRNDArray or RowSparseNDArray
             A copy of the array with the chosen storage stype
         """
-        return cast_storage(self, stype=stype)
+        return op.cast_storage(self, stype=stype)
 
 
 def onehot_encode(indices, out):
@@ -1595,7 +1602,7 @@ def moveaxis(tensor, source, destination):
     except IndexError:
         raise ValueError('Destination should verify 0 <= destination < tensor.ndim'
                          'Got %d' % destination)
-    return transpose(tensor, axes)
+    return op.transpose(tensor, axes)
 
 
 # pylint: disable= no-member, protected-access, too-many-arguments, redefined-outer-name
@@ -1751,7 +1758,7 @@ def add(lhs, rhs):
     return _ufunc_helper(
         lhs,
         rhs,
-        broadcast_add,
+        op.broadcast_add,
         operator.add,
         _internal._plus_scalar,
         None)
@@ -1813,7 +1820,7 @@ def subtract(lhs, rhs):
     return _ufunc_helper(
         lhs,
         rhs,
-        broadcast_sub,
+        op.broadcast_sub,
         operator.sub,
         _internal._minus_scalar,
         _internal._rminus_scalar)
@@ -1874,7 +1881,7 @@ def multiply(lhs, rhs):
     return _ufunc_helper(
         lhs,
         rhs,
-        broadcast_mul,
+        op.broadcast_mul,
         operator.mul,
         _internal._mul_scalar,
         None)
@@ -1931,7 +1938,7 @@ def divide(lhs, rhs):
     return _ufunc_helper(
         lhs,
         rhs,
-        broadcast_div,
+        op.broadcast_div,
         operator.truediv,
         _internal._div_scalar,
         _internal._rdiv_scalar)
@@ -1988,7 +1995,7 @@ def modulo(lhs, rhs):
     return _ufunc_helper(
         lhs,
         rhs,
-        broadcast_mod,
+        op.broadcast_mod,
         operator.mod,
         _internal._mod_scalar,
         _internal._rmod_scalar)
@@ -2050,7 +2057,7 @@ def power(base, exp):
     return _ufunc_helper(
         base,
         exp,
-        broadcast_power,
+        op.broadcast_power,
         operator.pow,
         _internal._power_scalar,
         _internal._rpower_scalar)
@@ -2107,7 +2114,7 @@ def maximum(lhs, rhs):
     return _ufunc_helper(
         lhs,
         rhs,
-        broadcast_maximum,
+        op.broadcast_maximum,
         lambda x, y: x if x > y else y,
         _internal._maximum_scalar,
         None)
@@ -2164,7 +2171,7 @@ def minimum(lhs, rhs):
     return _ufunc_helper(
         lhs,
         rhs,
-        broadcast_minimum,
+        op.broadcast_minimum,
         lambda x, y: x if x < y else y,
         _internal._minimum_scalar,
         None)
@@ -2228,7 +2235,7 @@ def equal(lhs, rhs):
     return _ufunc_helper(
         lhs,
         rhs,
-        broadcast_equal,
+        op.broadcast_equal,
         lambda x, y: 1 if x == y else 0,
         _internal._equal_scalar,
         None)
@@ -2295,7 +2302,7 @@ def not_equal(lhs, rhs):
     return _ufunc_helper(
         lhs,
         rhs,
-        broadcast_not_equal,
+        op.broadcast_not_equal,
         lambda x, y: 1 if x != y else 0,
         _internal._not_equal_scalar,
         None)
@@ -2359,7 +2366,7 @@ def greater(lhs, rhs):
     return _ufunc_helper(
         lhs,
         rhs,
-        broadcast_greater,
+        op.broadcast_greater,
         lambda x, y: 1 if x > y else 0,
         _internal._greater_scalar,
         _internal._lesser_scalar)
@@ -2423,7 +2430,7 @@ def greater_equal(lhs, rhs):
     return _ufunc_helper(
         lhs,
         rhs,
-        broadcast_greater_equal,
+        op.broadcast_greater_equal,
         lambda x, y: 1 if x >= y else 0,
         _internal._greater_equal_scalar,
         _internal._lesser_equal_scalar)
@@ -2487,7 +2494,7 @@ def lesser(lhs, rhs):
     return _ufunc_helper(
         lhs,
         rhs,
-        broadcast_lesser,
+        op.broadcast_lesser,
         lambda x, y: 1 if x < y else 0,
         _internal._lesser_scalar,
         _internal._greater_scalar)
@@ -2551,7 +2558,7 @@ def lesser_equal(lhs, rhs):
     return _ufunc_helper(
         lhs,
         rhs,
-        broadcast_lesser_equal,
+        op.broadcast_lesser_equal,
         lambda x, y: 1 if x <= y else 0,
         _internal._lesser_equal_scalar,
         _internal._greater_equal_scalar)
diff --git a/python/mxnet/ndarray/op.py b/python/mxnet/ndarray/op.py
index e4a1ab0..27ce09b 100644
--- a/python/mxnet/ndarray/op.py
+++ b/python/mxnet/ndarray/op.py
@@ -16,6 +16,7 @@
 # under the License.
 
 """Register backend ops in mxnet.ndarray namespace"""
+__all__ = ['CachedOp']
 
 import sys as _sys
 import os as _os
@@ -29,21 +30,21 @@ from ..ndarray_doc import _build_doc
 # pylint: disable=unused-import
 try:
     if int(_os.environ.get("MXNET_ENABLE_CYTHON", True)) == 0:
-        from .._ctypes.ndarray import NDArrayBase, _STORAGE_TYPE_ID_TO_STR
+        from .._ctypes.ndarray import NDArrayBase
         from .._ctypes.ndarray import CachedOp, _imperative_invoke
     elif _sys.version_info >= (3, 0):
-        from .._cy3.ndarray import NDArrayBase, _imperative_invoke, _STORAGE_TYPE_ID_TO_STR
+        from .._cy3.ndarray import NDArrayBase, _imperative_invoke
         from .._cy3.ndarray import CachedOp, _imperative_invoke
     else:
-        from .._cy2.ndarray import NDArrayBase, _imperative_invoke, _STORAGE_TYPE_ID_TO_STR
+        from .._cy2.ndarray import NDArrayBase, _imperative_invoke
         from .._cy2.ndarray import CachedOp, _imperative_invoke
 except ImportError:
     if int(_os.environ.get("MXNET_ENFORCE_CYTHON", False)) != 0:
         raise ImportError("Cython Module cannot be loaded but MXNET_ENFORCE_CYTHON=1")
-    from .._ctypes.ndarray import NDArrayBase, _imperative_invoke, _STORAGE_TYPE_ID_TO_STR
+    from .._ctypes.ndarray import NDArrayBase, _imperative_invoke
     from .._ctypes.ndarray import CachedOp, _imperative_invoke
 
-from ..base import mx_uint, check_call, _LIB, py_str, OpHandle, c_str, _Null
+from ..base import mx_uint, check_call, _LIB, py_str, _init_op_module, _Null
 # pylint: enable=unused-import
 
 
@@ -170,40 +171,4 @@ def %s(%s):
     return ndarray_function
 
 
-# pylint: enable=too-many-locals, invalid-name
-def _init_ndarray_module(root_namespace):
-    """List and add all the ndarray functions to current module."""
-    plist = ctypes.POINTER(ctypes.c_char_p)()
-    size = ctypes.c_uint()
-
-    check_call(_LIB.MXListAllOpNames(ctypes.byref(size),
-                                     ctypes.byref(plist)))
-    op_names = []
-    for i in range(size.value):
-        op_names.append(py_str(plist[i]))
-
-    module_obj = _sys.modules["%s.ndarray" % root_namespace]
-    module_sparse = _sys.modules["%s.ndarray.sparse" % root_namespace]
-    module_internal = _sys.modules["%s.ndarray._internal" % root_namespace]
-    module_contrib = _sys.modules["%s.contrib.ndarray" % root_namespace]
-    for name in op_names:
-        hdl = OpHandle()
-        check_call(_LIB.NNGetOpHandle(c_str(name), ctypes.byref(hdl)))
-        function = _make_ndarray_function(hdl, name)
-        if function.__name__.startswith('_contrib_'):
-            function.__name__ = function.__name__[9:]
-            function.__module__ = 'mxnet.contrib.ndarray'
-            setattr(module_contrib, function.__name__, function)
-        elif function.__name__.startswith('_'):
-            setattr(module_internal, function.__name__, function)
-        else:
-            setattr(module_obj, function.__name__, function)
-
-        # register sparse ops under mxnet.ndarray.sparse
-        if function.__name__.startswith('_sparse_'):
-            function.__name__ = function.__name__[8:]
-            function.__module__ = 'mxnet.ndarray.sparse'
-            setattr(module_sparse, function.__name__, function)
-
-# register backend operators in mx.nd
-_init_ndarray_module("mxnet")
+_init_op_module('mxnet', 'ndarray', _make_ndarray_function)
diff --git a/python/mxnet/symbol/sparse.py b/python/mxnet/ndarray/random.py
similarity index 91%
copy from python/mxnet/symbol/sparse.py
copy to python/mxnet/ndarray/random.py
index 1d94f2b..0ec4578 100644
--- a/python/mxnet/symbol/sparse.py
+++ b/python/mxnet/ndarray/random.py
@@ -15,4 +15,5 @@
 # specific language governing permissions and limitations
 # under the License.
 
-"""Sparse Symbol API of MXNet."""
+"""Random distribution generator NDArray API of MXNet."""
+__all__ = []
diff --git a/python/mxnet/ndarray/sparse.py b/python/mxnet/ndarray/sparse.py
index 97e43f5..806398e 100644
--- a/python/mxnet/ndarray/sparse.py
+++ b/python/mxnet/ndarray/sparse.py
@@ -31,6 +31,9 @@ import warnings
 import os as _os
 import sys as _sys
 
+__all__ = ["_ndarray_cls", "csr_matrix", "row_sparse_array",
+           "BaseSparseNDArray", "CSRNDArray", "RowSparseNDArray"]
+
 # import operator
 import numpy as np
 from ..base import NotSupportedForSparseNDArray
@@ -41,11 +44,12 @@ from ..context import Context
 from . import _internal
 from .ndarray import _DTYPE_NP_TO_MX, _DTYPE_MX_TO_NP
 from .ndarray import _STORAGE_TYPE_STR_TO_ID
+from .ndarray import _STORAGE_TYPE_UNDEFINED, _STORAGE_TYPE_DEFAULT
+from .ndarray import _STORAGE_TYPE_ROW_SPARSE, _STORAGE_TYPE_CSR
 from .ndarray import NDArray, _storage_type
 from .ndarray import zeros as _zeros_ndarray
 from .ndarray import array as _array
-from . import cast_storage
-from . import slice as nd_slice
+from . import op
 
 # Use different verison of SymbolBase
 # When possible, use cython to speedup part of computation.
@@ -64,10 +68,6 @@ except ImportError:
 # pylint: enable=unused-import
 
 
-__all__ = ["_ndarray_cls", "csr_matrix", "row_sparse_array",
-           "BaseSparseNDArray", "CSRNDArray", "RowSparseNDArray"]
-
-
 _STORAGE_AUX_TYPES = {
     'row_sparse': [np.int64],
     'csr': [np.int64, np.int64]
@@ -300,7 +300,7 @@ class CSRNDArray(BaseSparseNDArray):
         >>> indptr = np.array([0, 2, 3, 6])
         >>> indices = np.array([0, 2, 2, 0, 1, 2])
         >>> data = np.array([1, 2, 3, 4, 5, 6])
-        >>> a = mx.nd.csr_matrix(data, indptr, indices, (3, 3))
+        >>> a = mx.nd.sparse.csr_matrix(data, indptr, indices, (3, 3))
         >>> a.asnumpy()
         array([[1, 0, 2],
                [0, 0, 3],
@@ -316,7 +316,7 @@ class CSRNDArray(BaseSparseNDArray):
             if key.start is not None or key.stop is not None:
                 begin = key.start if key.start else 0
                 end = key.stop if key.stop else self.shape[0]
-                return nd_slice(self, begin=begin, end=end)
+                return op.slice(self, begin=begin, end=end)
             else:
                 return self
         if isinstance(key, tuple):
@@ -427,7 +427,7 @@ class CSRNDArray(BaseSparseNDArray):
         """
         if stype == 'row_sparse':
             raise ValueError("cast_storage from csr to row_sparse is not supported")
-        return cast_storage(self, stype=stype)
+        return op.cast_storage(self, stype=stype)
 
     def copyto(self, other):
         """Copies the value of this array to another array.
@@ -640,7 +640,7 @@ class RowSparseNDArray(BaseSparseNDArray):
         """
         if stype == 'csr':
             raise ValueError("cast_storage from row_sparse to csr is not supported")
-        return cast_storage(self, stype=stype)
+        return op.cast_storage(self, stype=stype)
 
     def copyto(self, other):
         """Copies the value of this array to another array.
@@ -725,7 +725,7 @@ def csr_matrix(data, indptr, indices, shape, ctx=None, dtype=None, indptr_type=N
     Example
     -------
     >>> import mxnet as mx
-    >>> a = mx.nd.csr_matrix([1, 2, 3], [0, 1, 2, 2, 3], [1, 0, 2], (4, 3))
+    >>> a = mx.nd.sparse.csr_matrix([1, 2, 3], [0, 1, 2, 2, 3], [1, 0, 2], (4, 3))
     >>> a.asnumpy()
     array([[ 0.,  1.,  0.],
            [ 2.,  0.,  0.],
@@ -794,7 +794,7 @@ def row_sparse_array(data, indices, shape, ctx=None, dtype=None, indices_type=No
 
     Example
     -------
-    >>> a = mx.nd.row_sparse_array([[1, 2], [3, 4]], [1, 4], (6, 2))
+    >>> a = mx.nd.sparse.row_sparse_array([[1, 2], [3, 4]], [1, 4], (6, 2))
     >>> a.asnumpy()
     array([[ 0.,  0.],
            [ 1.,  2.],
@@ -831,14 +831,14 @@ def row_sparse_array(data, indices, shape, ctx=None, dtype=None, indices_type=No
     return result
 
 
-def _ndarray_cls(handle, writable=True, stype=None):
-    if stype is None:
+def _ndarray_cls(handle, writable=True, stype=_STORAGE_TYPE_UNDEFINED):
+    if stype == _STORAGE_TYPE_UNDEFINED:
         stype = _storage_type(handle)
-    if stype == 'default':
+    if stype == _STORAGE_TYPE_DEFAULT:
         return NDArray(handle, writable=writable)
-    elif stype == 'csr':
+    elif stype == _STORAGE_TYPE_CSR:
         return CSRNDArray(handle, writable=writable)
-    elif stype == 'row_sparse':
+    elif stype == _STORAGE_TYPE_ROW_SPARSE:
         return RowSparseNDArray(handle, writable=writable)
     else:
         raise Exception("unknown storage type")
@@ -910,13 +910,13 @@ def array(source_array, ctx=None, dtype=None, aux_types=None):
     """Creates a sparse array from any object exposing the array interface.
     """
     if isinstance(source_array, NDArray):
-        assert(source_array.stype != 'default'), \
-               "Please use `cast_storage` to create BaseSparseNDArray from an NDArray"
+        assert(source_array.stype != 'default'),\
+            "Please use `cast_storage` to create BaseSparseNDArray from an NDArray"
         dtype = source_array.dtype if dtype is None else dtype
         aux_types = source_array._aux_types if aux_types is None else aux_types
     else:
         # TODO(haibin/anisub) support creation from scipy object when `_sync_copy_from` is ready
-        raise NotImplementedError('creating BaseSparseNDArray from ' \
+        raise NotImplementedError('creating BaseSparseNDArray from '
                                   ' a non-NDArray object is not implemented.')
     arr = empty(source_array.stype, source_array.shape, ctx, dtype, aux_types)
     arr[:] = source_array
diff --git a/python/mxnet/random.py b/python/mxnet/random.py
index 14bfc27..5754304 100644
--- a/python/mxnet/random.py
+++ b/python/mxnet/random.py
@@ -48,19 +48,19 @@ def seed(seed_state):
 
     Example
     -------
-    >>> print(mx.nd.random_normal(shape=(2,2)).asnumpy())
+    >>> print(mx.nd.random.normal(shape=(2,2)).asnumpy())
     [[ 1.36481571 -0.62203991]
      [-1.4962182  -0.08511394]]
-    >>> print(mx.nd.random_normal(shape=(2,2)).asnumpy())
+    >>> print(mx.nd.random.normal(shape=(2,2)).asnumpy())
     [[ 1.09544981 -0.20014545]
      [-0.20808885  0.2527658 ]]
     >>>
     >>> mx.random.seed(128)
-    >>> print(mx.nd.random_normal(shape=(2,2)).asnumpy())
+    >>> print(mx.nd.random.normal(shape=(2,2)).asnumpy())
     [[ 0.47400656 -0.75213492]
      [ 0.20251541  0.95352972]]
     >>> mx.random.seed(128)
-    >>> print(mx.nd.random_normal(shape=(2,2)).asnumpy())
+    >>> print(mx.nd.random.normal(shape=(2,2)).asnumpy())
     [[ 0.47400656 -0.75213492]
      [ 0.20251541  0.95352972]]
     """
diff --git a/python/mxnet/symbol/__init__.py b/python/mxnet/symbol/__init__.py
index d93a230..2694b4e 100644
--- a/python/mxnet/symbol/__init__.py
+++ b/python/mxnet/symbol/__init__.py
@@ -17,7 +17,10 @@
 
 """Symbol API of MXNet."""
 
-from . import _internal, sparse, op
+from . import _internal, contrib, linalg, random, sparse
 # pylint: disable=wildcard-import, redefined-builtin
+from .op import *
 from .symbol import *
-from ..ndarray import _GRAD_REQ_MAP
+# pylint: enable=wildcard-import
+
+__all__ = op.__all__ + symbol.__all__ + ['contrib', 'linalg', 'random', 'sparse']
diff --git a/python/mxnet/symbol/_internal.py b/python/mxnet/symbol/_internal.py
index cd6ae41..25b3b60 100644
--- a/python/mxnet/symbol/_internal.py
+++ b/python/mxnet/symbol/_internal.py
@@ -16,3 +16,4 @@
 # under the License.
 
 """Symbol namespace used to register internal functions."""
+__all__ = []
diff --git a/python/mxnet/symbol/sparse.py b/python/mxnet/symbol/contrib.py
similarity index 94%
copy from python/mxnet/symbol/sparse.py
copy to python/mxnet/symbol/contrib.py
index 1d94f2b..f21d144 100644
--- a/python/mxnet/symbol/sparse.py
+++ b/python/mxnet/symbol/contrib.py
@@ -15,4 +15,5 @@
 # specific language governing permissions and limitations
 # under the License.
 
-"""Sparse Symbol API of MXNet."""
+"""Contrib NDArray API of MXNet."""
+__all__ = []
diff --git a/python/mxnet/symbol/sparse.py b/python/mxnet/symbol/linalg.py
similarity index 93%
copy from python/mxnet/symbol/sparse.py
copy to python/mxnet/symbol/linalg.py
index 1d94f2b..0c8e7fd 100644
--- a/python/mxnet/symbol/sparse.py
+++ b/python/mxnet/symbol/linalg.py
@@ -15,4 +15,5 @@
 # specific language governing permissions and limitations
 # under the License.
 
-"""Sparse Symbol API of MXNet."""
+"""Linear Algebra NDArray API of MXNet."""
+__all__ = []
diff --git a/python/mxnet/symbol/op.py b/python/mxnet/symbol/op.py
index 82884a5..9353a73 100644
--- a/python/mxnet/symbol/op.py
+++ b/python/mxnet/symbol/op.py
@@ -16,14 +16,15 @@
 # under the License.
 
 """Register backend ops in mxnet.symbol namespace."""
+__all__ = []
 
 import sys as _sys
 import os as _os
 import ctypes
 import numpy as _numpy  # pylint: disable=unused-import
 
-from mxnet.base import mx_uint, check_call, _LIB, py_str, OpHandle, c_str
-from mxnet.symbol_doc import _build_doc
+from ..base import mx_uint, check_call, _LIB, py_str
+from ..symbol_doc import _build_doc
 
 # Use different version of SymbolBase
 # When possible, use cython to speedup part of computation.
@@ -44,7 +45,7 @@ except ImportError:
     from .._ctypes.symbol import SymbolBase, _set_symbol_class
     from .._ctypes.symbol import _symbol_creator
 
-from ..base import _Null
+from ..base import _Null, _init_op_module
 from ..name import NameManager
 from ..attribute import AttrScope
 # pylint: enable=unused-import
@@ -203,40 +204,4 @@ def %s(%s):
     return symbol_function
 
 
-def _init_symbol_module(root_namespace):
-    """List and add all the atomic symbol functions to current module."""
-    plist = ctypes.POINTER(ctypes.c_char_p)()
-    size = ctypes.c_uint()
-
-    check_call(_LIB.MXListAllOpNames(ctypes.byref(size),
-                                     ctypes.byref(plist)))
-    op_names = []
-    for i in range(size.value):
-        op_names.append(py_str(plist[i]))
-
-    module_obj = _sys.modules["%s.symbol" % root_namespace]
-    module_sparse = _sys.modules["%s.symbol.sparse" % root_namespace]
-    module_internal = _sys.modules["%s.symbol._internal" % root_namespace]
-    module_contrib = _sys.modules["%s.contrib.symbol" % root_namespace]
-    for name in op_names:
-        hdl = OpHandle()
-        check_call(_LIB.NNGetOpHandle(c_str(name), ctypes.byref(hdl)))
-        function = _make_atomic_symbol_function(hdl, name)
-        if function.__name__.startswith('_contrib_'):
-            function.__name__ = function.__name__[9:]
-            function.__module__ = 'mxnet.contrib.symbol'
-            setattr(module_contrib, function.__name__, function)
-        elif function.__name__.startswith('_'):
-            setattr(module_internal, function.__name__, function)
-        else:
-            setattr(module_obj, function.__name__, function)
-
-        # register sparse ops under mxnet.symbol.sparse
-        if function.__name__.startswith('_sparse_'):
-            function.__name__ = function.__name__[8:]
-            function.__module__ = 'mxnet.symbol.sparse'
-            setattr(module_sparse, function.__name__, function)
-
-
-# Initialize the atomic symbol in startups
-_init_symbol_module("mxnet")
+_init_op_module('mxnet', 'symbol', _make_atomic_symbol_function)
diff --git a/python/mxnet/symbol/sparse.py b/python/mxnet/symbol/random.py
similarity index 91%
copy from python/mxnet/symbol/sparse.py
copy to python/mxnet/symbol/random.py
index 1d94f2b..0ec4578 100644
--- a/python/mxnet/symbol/sparse.py
+++ b/python/mxnet/symbol/random.py
@@ -15,4 +15,5 @@
 # specific language governing permissions and limitations
 # under the License.
 
-"""Sparse Symbol API of MXNet."""
+"""Random distribution generator NDArray API of MXNet."""
+__all__ = []
diff --git a/python/mxnet/symbol/sparse.py b/python/mxnet/symbol/sparse.py
index 1d94f2b..3bfcf4a 100644
--- a/python/mxnet/symbol/sparse.py
+++ b/python/mxnet/symbol/sparse.py
@@ -16,3 +16,4 @@
 # under the License.
 
 """Sparse Symbol API of MXNet."""
+__all__ = []
diff --git a/python/mxnet/symbol/symbol.py b/python/mxnet/symbol/symbol.py
index 0038840..cc128b2 100644
--- a/python/mxnet/symbol/symbol.py
+++ b/python/mxnet/symbol/symbol.py
@@ -40,13 +40,13 @@ from ..ndarray import NDArray, _DTYPE_NP_TO_MX, _DTYPE_MX_TO_NP, _GRAD_REQ_MAP
 from ..ndarray.ndarray import _STORAGE_TYPE_STR_TO_ID
 from ..ndarray import _ndarray_cls
 from ..executor import Executor
-from . import _internal, reshape, transpose, zeros_like, ones_like, broadcast_axes, broadcast_to
-from . import flatten, norm, rint, fix, floor, ceil, split, slice_axis, one_hot, pick, take
-from . import trunc, expand_dims, flip, tile, repeat, pad, clip, sign
-from . import nansum, prod, nanprod, mean, sort, topk, argsort, argmax, argmin
-from . import sum, round, max, min, slice, abs # pylint: disable=redefined-builtin
+from . import _internal
+from . import op
 from .op import SymbolBase, _set_symbol_class, AttrScope, _Null  # pylint: disable=unused-import
 
+__all__ = ["Symbol", "var", "Variable", "Group", "load", "load_json",
+           "pow", "maximum", "minimum", "hypot", "zeros", "ones", "full", "arange"]
+
 
 class Symbol(SymbolBase):
     """Symbol is symbolic graph of the mxnet."""
@@ -1497,7 +1497,7 @@ class Symbol(SymbolBase):
                 shared_buffer[k] = v
 
         # create in_args, arg_grads, and aux_states for the current executor
-        arg_arrays = [_ndarray_cls(NDArrayHandle(in_arg_handles[i])) \
+        arg_arrays = [_ndarray_cls(NDArrayHandle(in_arg_handles[i]))
                       for i in range(num_in_args.value)]
         grad_arrays = [_ndarray_cls(NDArrayHandle(arg_grad_handles[i]))
                        if arg_grad_handles[i] is not None
@@ -1742,7 +1742,7 @@ class Symbol(SymbolBase):
         The arguments are the same as for :py:func:`reshape`, with
         this array as data.
         """
-        return reshape(self, *args, **kwargs)
+        return op.reshape(self, *args, **kwargs)
 
     def astype(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`cast`.
@@ -1750,7 +1750,7 @@ class Symbol(SymbolBase):
         The arguments are the same as for :py:func:`cast`, with
         this array as data.
         """
-        return cast(self, *args, **kwargs)
+        return op.cast(self, *args, **kwargs)
 
     def zeros_like(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`zeros_like`.
@@ -1758,7 +1758,7 @@ class Symbol(SymbolBase):
         The arguments are the same as for :py:func:`zeros_like`, with
         this array as data.
         """
-        return zeros_like(self, *args, **kwargs)
+        return op.zeros_like(self, *args, **kwargs)
 
     def ones_like(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`ones_like`.
@@ -1766,7 +1766,7 @@ class Symbol(SymbolBase):
         The arguments are the same as for :py:func:`ones_like`, with
         this array as data.
         """
-        return ones_like(self, *args, **kwargs)
+        return op.ones_like(self, *args, **kwargs)
 
     def broadcast_axes(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`broadcast_axes`.
@@ -1774,7 +1774,7 @@ class Symbol(SymbolBase):
         The arguments are the same as for :py:func:`broadcast_axes`, with
         this array as data.
         """
-        return broadcast_axes(self, *args, **kwargs)
+        return op.broadcast_axes(self, *args, **kwargs)
 
     def repeat(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`repeat`.
@@ -1782,7 +1782,7 @@ class Symbol(SymbolBase):
         The arguments are the same as for :py:func:`repeat`, with
         this array as data.
         """
-        return repeat(self, *args, **kwargs)
+        return op.repeat(self, *args, **kwargs)
 
     def pad(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`pad`.
@@ -1790,7 +1790,7 @@ class Symbol(SymbolBase):
         The arguments are the same as for :py:func:`pad`, with
         this array as data.
         """
-        return pad(self, *args, **kwargs)
+        return op.pad(self, *args, **kwargs)
 
     def swapaxes(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`swapaxes`.
@@ -1798,7 +1798,7 @@ class Symbol(SymbolBase):
         The arguments are the same as for :py:func:`swapaxes`, with
         this array as data.
         """
-        return swapaxes(self, *args, **kwargs)
+        return op.swapaxes(self, *args, **kwargs)
 
     def split(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`split`.
@@ -1806,7 +1806,7 @@ class Symbol(SymbolBase):
         The arguments are the same as for :py:func:`split`, with
         this array as data.
         """
-        return split(self, *args, **kwargs)
+        return op.split(self, *args, **kwargs)
 
     def slice(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`slice`.
@@ -1814,7 +1814,7 @@ class Symbol(SymbolBase):
         The arguments are the same as for :py:func:`slice`, with
         this array as data.
         """
-        return slice(self, *args, **kwargs)
+        return op.slice(self, *args, **kwargs)
 
     def slice_axis(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`slice_axis`.
@@ -1822,7 +1822,7 @@ class Symbol(SymbolBase):
         The arguments are the same as for :py:func:`slice_axis`, with
         this array as data.
         """
-        return slice_axis(self, *args, **kwargs)
+        return op.slice_axis(self, *args, **kwargs)
 
     def take(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`take`.
@@ -1830,7 +1830,7 @@ class Symbol(SymbolBase):
         The arguments are the same as for :py:func:`take`, with
         this array as data.
         """
-        return take(self, *args, **kwargs)
+        return op.take(self, *args, **kwargs)
 
     def one_hot(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`one_hot`.
@@ -1838,7 +1838,7 @@ class Symbol(SymbolBase):
         The arguments are the same as for :py:func:`one_hot`, with
         this array as data.
         """
-        return one_hot(self, *args, **kwargs)
+        return op.one_hot(self, *args, **kwargs)
 
     def pick(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`pick`.
@@ -1846,7 +1846,7 @@ class Symbol(SymbolBase):
         The arguments are the same as for :py:func:`pick`, with
         this array as data.
         """
-        return pick(self, *args, **kwargs)
+        return op.pick(self, *args, **kwargs)
 
     def sort(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`sort`.
@@ -1854,7 +1854,7 @@ class Symbol(SymbolBase):
         The arguments are the same as for :py:func:`sort`, with
         this array as data.
         """
-        return sort(self, *args, **kwargs)
+        return op.sort(self, *args, **kwargs)
 
     def topk(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`topk`.
@@ -1862,7 +1862,7 @@ class Symbol(SymbolBase):
         The arguments are the same as for :py:func:`topk`, with
         this array as data.
         """
-        return topk(self, *args, **kwargs)
+        return op.topk(self, *args, **kwargs)
 
     def argsort(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`argsort`.
@@ -1870,7 +1870,7 @@ class Symbol(SymbolBase):
         The arguments are the same as for :py:func:`argsort`, with
         this array as data.
         """
-        return argsort(self, *args, **kwargs)
+        return op.argsort(self, *args, **kwargs)
 
     def argmax(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`argmax`.
@@ -1878,7 +1878,7 @@ class Symbol(SymbolBase):
         The arguments are the same as for :py:func:`argmax`, with
         this array as data.
         """
-        return argmax(self, *args, **kwargs)
+        return op.argmax(self, *args, **kwargs)
 
     def argmin(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`argmin`.
@@ -1886,7 +1886,7 @@ class Symbol(SymbolBase):
         The arguments are the same as for :py:func:`argmin`, with
         this array as data.
         """
-        return argmin(self, *args, **kwargs)
+        return op.argmin(self, *args, **kwargs)
 
     def clip(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`clip`.
@@ -1894,7 +1894,7 @@ class Symbol(SymbolBase):
         The arguments are the same as for :py:func:`clip`, with
         this array as data.
         """
-        return clip(self, *args, **kwargs)
+        return op.clip(self, *args, **kwargs)
 
     def abs(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`abs`.
@@ -1902,7 +1902,7 @@ class Symbol(SymbolBase):
         The arguments are the same as for :py:func:`abs`, with
         this array as data.
         """
-        return abs(self, *args, **kwargs)
+        return op.abs(self, *args, **kwargs)
 
     def sign(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`sign`.
@@ -1910,7 +1910,7 @@ class Symbol(SymbolBase):
         The arguments are the same as for :py:func:`sign`, with
         this array as data.
         """
-        return sign(self, *args, **kwargs)
+        return op.sign(self, *args, **kwargs)
 
     def flatten(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`flatten`.
@@ -1918,7 +1918,7 @@ class Symbol(SymbolBase):
         The arguments are the same as for :py:func:`flatten`, with
         this array as data.
         """
-        return flatten(self, *args, **kwargs)
+        return op.flatten(self, *args, **kwargs)
 
     def expand_dims(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`expand_dims`.
@@ -1926,7 +1926,7 @@ class Symbol(SymbolBase):
         The arguments are the same as for :py:func:`expand_dims`, with
         this array as data.
         """
-        return expand_dims(self, *args, **kwargs)
+        return op.expand_dims(self, *args, **kwargs)
 
     def broadcast_to(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`broadcast_to`.
@@ -1934,7 +1934,7 @@ class Symbol(SymbolBase):
         The arguments are the same as for :py:func:`broadcast_to`, with
         this array as data.
         """
-        return broadcast_to(self, *args, **kwargs)
+        return op.broadcast_to(self, *args, **kwargs)
 
     def tile(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`tile`.
@@ -1942,7 +1942,7 @@ class Symbol(SymbolBase):
         The arguments are the same as for :py:func:`tile`, with
         this array as data.
         """
-        return tile(self, *args, **kwargs)
+        return op.tile(self, *args, **kwargs)
 
     def transpose(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`transpose`.
@@ -1950,7 +1950,7 @@ class Symbol(SymbolBase):
         The arguments are the same as for :py:func:`transpose`, with
         this array as data.
         """
-        return transpose(self, *args, **kwargs)
+        return op.transpose(self, *args, **kwargs)
 
     def flip(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`flip`.
@@ -1958,7 +1958,7 @@ class Symbol(SymbolBase):
         The arguments are the same as for :py:func:`flip`, with
         this array as data.
         """
-        return flip(self, *args, **kwargs)
+        return op.flip(self, *args, **kwargs)
 
     def sum(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`sum`.
@@ -1966,7 +1966,7 @@ class Symbol(SymbolBase):
         The arguments are the same as for :py:func:`sum`, with
         this array as data.
         """
-        return sum(self, *args, **kwargs)
+        return op.sum(self, *args, **kwargs)
 
     def nansum(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`nansum`.
@@ -1974,7 +1974,7 @@ class Symbol(SymbolBase):
         The arguments are the same as for :py:func:`nansum`, with
         this array as data.
         """
-        return nansum(self, *args, **kwargs)
+        return op.nansum(self, *args, **kwargs)
 
     def prod(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`prod`.
@@ -1982,7 +1982,7 @@ class Symbol(SymbolBase):
         The arguments are the same as for :py:func:`prod`, with
         this array as data.
         """
-        return prod(self, *args, **kwargs)
+        return op.prod(self, *args, **kwargs)
 
     def nanprod(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`nanprod`.
@@ -1990,7 +1990,7 @@ class Symbol(SymbolBase):
         The arguments are the same as for :py:func:`nanprod`, with
         this array as data.
         """
-        return nanprod(self, *args, **kwargs)
+        return op.nanprod(self, *args, **kwargs)
 
     def mean(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`mean`.
@@ -1998,7 +1998,7 @@ class Symbol(SymbolBase):
         The arguments are the same as for :py:func:`mean`, with
         this array as data.
         """
-        return mean(self, *args, **kwargs)
+        return op.mean(self, *args, **kwargs)
 
     def max(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`max`.
@@ -2006,7 +2006,7 @@ class Symbol(SymbolBase):
         The arguments are the same as for :py:func:`max`, with
         this array as data.
         """
-        return max(self, *args, **kwargs)
+        return op.max(self, *args, **kwargs)
 
     def min(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`min`.
@@ -2014,7 +2014,7 @@ class Symbol(SymbolBase):
         The arguments are the same as for :py:func:`min`, with
         this array as data.
         """
-        return min(self, *args, **kwargs)
+        return op.min(self, *args, **kwargs)
 
     def norm(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`norm`.
@@ -2022,7 +2022,7 @@ class Symbol(SymbolBase):
         The arguments are the same as for :py:func:`norm`, with
         this array as data.
         """
-        return norm(self, *args, **kwargs)
+        return op.norm(self, *args, **kwargs)
 
     def round(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`round`.
@@ -2030,7 +2030,7 @@ class Symbol(SymbolBase):
         The arguments are the same as for :py:func:`round`, with
         this array as data.
         """
-        return round(self, *args, **kwargs)
+        return op.round(self, *args, **kwargs)
 
     def rint(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`rint`.
@@ -2038,7 +2038,7 @@ class Symbol(SymbolBase):
         The arguments are the same as for :py:func:`rint`, with
         this array as data.
         """
-        return rint(self, *args, **kwargs)
+        return op.rint(self, *args, **kwargs)
 
     def fix(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`fix`.
@@ -2046,7 +2046,7 @@ class Symbol(SymbolBase):
         The arguments are the same as for :py:func:`fix`, with
         this array as data.
         """
-        return fix(self, *args, **kwargs)
+        return op.fix(self, *args, **kwargs)
 
     def floor(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`floor`.
@@ -2054,7 +2054,7 @@ class Symbol(SymbolBase):
         The arguments are the same as for :py:func:`floor`, with
         this array as data.
         """
-        return floor(self, *args, **kwargs)
+        return op.floor(self, *args, **kwargs)
 
     def ceil(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`ceil`.
@@ -2062,7 +2062,7 @@ class Symbol(SymbolBase):
         The arguments are the same as for :py:func:`ceil`, with
         this array as data.
         """
-        return ceil(self, *args, **kwargs)
+        return op.ceil(self, *args, **kwargs)
 
     def trunc(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`trunc`.
@@ -2070,7 +2070,7 @@ class Symbol(SymbolBase):
         The arguments are the same as for :py:func:`trunc`, with
         this array as data.
         """
-        return trunc(self, *args, **kwargs)
+        return op.trunc(self, *args, **kwargs)
 
     def wait_to_read(self):
         raise NotImplementedForSymbol(self.wait_to_read, None)
diff --git a/src/operator/random/sample_op.cc b/src/operator/random/sample_op.cc
index 363163c..ea6fdd5 100644
--- a/src/operator/random/sample_op.cc
+++ b/src/operator/random/sample_op.cc
@@ -45,9 +45,10 @@ DMLC_REGISTER_PARAMETER(SampleGenNegBinomialParam);
   .add_arguments(ParamType::__FIELDS__())
 
 // Add "uniform" alias for backward compatibility
-MXNET_OPERATOR_REGISTER_SAMPLE(random_uniform, SampleUniformParam)
+MXNET_OPERATOR_REGISTER_SAMPLE(_random_uniform, SampleUniformParam)
 .add_alias("uniform")
 .add_alias("_sample_uniform")
+.add_alias("random_uniform")
 .describe(R"code(Draw random samples from a uniform distribution.
 
 .. note:: The existing alias ``uniform`` is deprecated.
@@ -57,17 +58,18 @@ Samples are uniformly distributed over the half-open interval *[low, high)*
 
 Example::
 
-   random_uniform(low=0, high=1, shape=(2,2)) = [[ 0.60276335,  0.85794562],
-                                                 [ 0.54488319,  0.84725171]]
+   uniform(low=0, high=1, shape=(2,2)) = [[ 0.60276335,  0.85794562],
+                                          [ 0.54488319,  0.84725171]]
 
 )code" ADD_FILELINE)
 .set_attr<FCompute>("FCompute<cpu>", SampleUniform_<cpu>)
 .set_attr<FComputeEx>("FComputeEx<cpu>", SampleUniformEx_<cpu>);
 
 // Add "normal" alias for backward compatibility
-MXNET_OPERATOR_REGISTER_SAMPLE(random_normal, SampleNormalParam)
+MXNET_OPERATOR_REGISTER_SAMPLE(_random_normal, SampleNormalParam)
 .add_alias("normal")
 .add_alias("_sample_normal")
+.add_alias("random_normal")
 .describe(R"code(Draw random samples from a normal (Gaussian) distribution.
 
 .. note:: The existing alias ``normal`` is deprecated.
@@ -76,41 +78,44 @@ Samples are distributed according to a normal distribution parametrized by *loc*
 
 Example::
 
-   random_normal(loc=0, scale=1, shape=(2,2)) = [[ 1.89171135, -1.16881478],
-                                                 [-1.23474145,  1.55807114]]
+   normal(loc=0, scale=1, shape=(2,2)) = [[ 1.89171135, -1.16881478],
+                                          [-1.23474145,  1.55807114]]
 )code" ADD_FILELINE)
 .set_attr<FCompute>("FCompute<cpu>", SampleNormal_<cpu>)
 .set_attr<FComputeEx>("FComputeEx<cpu>", SampleNormalEx_<cpu>);
 
-MXNET_OPERATOR_REGISTER_SAMPLE(random_gamma, SampleGammaParam)
+MXNET_OPERATOR_REGISTER_SAMPLE(_random_gamma, SampleGammaParam)
 .add_alias("_sample_gamma")
+.add_alias("random_gamma")
 .describe(R"code(Draw random samples from a gamma distribution.
 
 Samples are distributed according to a gamma distribution parametrized by *alpha* (shape) and *beta* (scale).
 
 Example::
 
-   random_gamma(alpha=9, beta=0.5, shape=(2,2)) = [[ 7.10486984,  3.37695289],
-                                                   [ 3.91697288,  3.65933681]]
+   gamma(alpha=9, beta=0.5, shape=(2,2)) = [[ 7.10486984,  3.37695289],
+                                            [ 3.91697288,  3.65933681]]
 )code" ADD_FILELINE)
 .set_attr<FCompute>("FCompute<cpu>", SampleGamma_<cpu>)
 .set_attr<FComputeEx>("FComputeEx<cpu>", SampleGammaEx_<cpu>);
 
-MXNET_OPERATOR_REGISTER_SAMPLE(random_exponential, SampleExponentialParam)
+MXNET_OPERATOR_REGISTER_SAMPLE(_random_exponential, SampleExponentialParam)
 .add_alias("_sample_exponential")
+.add_alias("random_exponential")
 .describe(R"code(Draw random samples from an exponential distribution.
 
 Samples are distributed according to an exponential distribution parametrized by *lambda* (rate).
 
 Example::
 
-   random_exponential(lam=4, shape=(2,2)) = [[ 0.0097189 ,  0.08999364],
-                                             [ 0.04146638,  0.31715935]]
+   exponential(lam=4, shape=(2,2)) = [[ 0.0097189 ,  0.08999364],
+                                      [ 0.04146638,  0.31715935]]
 )code" ADD_FILELINE)
 .set_attr<FCompute>("FCompute<cpu>", SampleExponential_<cpu>);
 
-MXNET_OPERATOR_REGISTER_SAMPLE(random_poisson, SamplePoissonParam)
+MXNET_OPERATOR_REGISTER_SAMPLE(_random_poisson, SamplePoissonParam)
 .add_alias("_sample_poisson")
+.add_alias("random_poisson")
 .describe(R"code(Draw random samples from a Poisson distribution.
 
 Samples are distributed according to a Poisson distribution parametrized by *lambda* (rate).
@@ -118,13 +123,14 @@ Samples will always be returned as a floating point data type.
 
 Example::
 
-   random_poisson(lam=4, shape=(2,2)) = [[ 5.,  2.],
-                                         [ 4.,  6.]]
+   poisson(lam=4, shape=(2,2)) = [[ 5.,  2.],
+                                  [ 4.,  6.]]
 )code" ADD_FILELINE)
 .set_attr<FCompute>("FCompute<cpu>", SamplePoisson_<cpu>);
 
-MXNET_OPERATOR_REGISTER_SAMPLE(random_negative_binomial, SampleNegBinomialParam)
+MXNET_OPERATOR_REGISTER_SAMPLE(_random_negative_binomial, SampleNegBinomialParam)
 .add_alias("_sample_negbinomial")
+.add_alias("random_negative_binomial")
 .describe(R"code(Draw random samples from a negative binomial distribution.
 
 Samples are distributed according to a negative binomial distribution parametrized by
@@ -133,13 +139,14 @@ Samples will always be returned as a floating point data type.
 
 Example::
 
-   random_negative_binomial(k=3, p=0.4, shape=(2,2)) = [[ 4.,  7.],
-                                                        [ 2.,  5.]]
+   negative_binomial(k=3, p=0.4, shape=(2,2)) = [[ 4.,  7.],
+                                                 [ 2.,  5.]]
 )code" ADD_FILELINE)
 .set_attr<FCompute>("FCompute<cpu>", SampleNegBinomial_<cpu>);
 
-MXNET_OPERATOR_REGISTER_SAMPLE(random_generalized_negative_binomial, SampleGenNegBinomialParam)
+MXNET_OPERATOR_REGISTER_SAMPLE(_random_generalized_negative_binomial, SampleGenNegBinomialParam)
 .add_alias("_sample_gennegbinomial")
+.add_alias("random_generalized_negative_binomial")
 .describe(R"code(Draw random samples from a generalized negative binomial distribution.
 
 Samples are distributed according to a generalized negative binomial distribution parametrized by
@@ -149,8 +156,8 @@ Samples will always be returned as a floating point data type.
 
 Example::
 
-   random_generalized_negative_binomial(mu=2.0, alpha=0.3, shape=(2,2)) = [[ 2.,  1.],
-                                                                           [ 6.,  4.]]
+   generalized_negative_binomial(mu=2.0, alpha=0.3, shape=(2,2)) = [[ 2.,  1.],
+                                                                    [ 6.,  4.]]
 )code" ADD_FILELINE)
 .set_attr<FCompute>("FCompute<cpu>", SampleGenNegBinomial_<cpu>);
 
diff --git a/src/operator/random/sample_op.cu b/src/operator/random/sample_op.cu
index 7bdb9fa..a26413d 100644
--- a/src/operator/random/sample_op.cu
+++ b/src/operator/random/sample_op.cu
@@ -103,11 +103,11 @@ void SampleNormal_<gpu>(const nnvm::NodeAttrs& attrs,
   SampleNormalDnsImpl<gpu>(attrs, ctx, req[0], &out);
 }
 
-NNVM_REGISTER_OP(random_uniform)
+NNVM_REGISTER_OP(_random_uniform)
 .set_attr<FCompute>("FCompute<gpu>", SampleUniform_<gpu>)
 .set_attr<FComputeEx>("FComputeEx<gpu>", SampleUniformEx_<gpu>);
 
-NNVM_REGISTER_OP(random_normal)
+NNVM_REGISTER_OP(_random_normal)
 .set_attr<FCompute>("FCompute<gpu>", SampleNormal_<gpu>)
 .set_attr<FComputeEx>("FComputeEx<gpu>", SampleNormalEx_<gpu>);
 
diff --git a/src/operator/tensor/la_op.cc b/src/operator/tensor/la_op.cc
index 70d4f9b..9b94603 100644
--- a/src/operator/tensor/la_op.cc
+++ b/src/operator/tensor/la_op.cc
@@ -31,7 +31,8 @@ DMLC_REGISTER_PARAMETER(LaMatrixMacParam);
 DMLC_REGISTER_PARAMETER(LaMatrixMultParam);
 DMLC_REGISTER_PARAMETER(LaTriangMatrixMultParam);
 
-NNVM_REGISTER_OP(linalg_gemm)
+NNVM_REGISTER_OP(_linalg_gemm)
+.add_alias("linalg_gemm")
 .describe(R"code(Performs general matrix multiplication and accumulation.
 Input are three tensors *A*, *B*, *C* each of dimension *n >= 2* and each
 having the same shape on the leading *n-2* dimensions. For every *n-2* dimensional index *i* let
@@ -54,14 +55,14 @@ Examples::
    A = [[1.0, 1.0], [1.0, 1.0]]
    B = [[1.0, 1.0], [1.0, 1.0], [1.0, 1.0]]
    C = [[1.0, 1.0, 1.0], [1.0, 1.0, 1.0]]
-   linalg_gemm(A, B, C, transpose_b = 1, alpha = 2.0 , beta = 10.0)
+   gemm(A, B, C, transpose_b = 1, alpha = 2.0 , beta = 10.0)
            = [[14.0, 14.0, 14.0], [14.0, 14.0, 14.0]]
 
    // Batch matrix multiply-add
    A = [[[1.0, 1.0]], [[0.1, 0.1]]]
    B = [[[1.0, 1.0]], [[0.1, 0.1]]]
    C = [[[10.0]], [[0.01]]]
-   linalg_gemm(A, B, C, transpose_b = 1, alpha = 2.0 , beta = 10.0)
+   gemm(A, B, C, transpose_b = 1, alpha = 2.0 , beta = 10.0)
            = [[[104.0]], [[0.14]]]
 )code" ADD_FILELINE)
 .set_num_inputs(3)
@@ -91,7 +92,8 @@ NNVM_REGISTER_OP(_backward_linalg_gemm)
 .set_attr<nnvm::TIsBackward>("TIsBackward", true)
 .set_attr<FCompute>("FCompute<cpu>", LaOpBackward<cpu, 2, 2, 4, 3, gemm_backward>);
 
-NNVM_REGISTER_OP(linalg_gemm2)
+NNVM_REGISTER_OP(_linalg_gemm2)
+.add_alias("linalg_gemm2")
 .describe(R"code(Performs general matrix multiplication.
 Input are two tensors *A*, *B* each of dimension *n >= 2* and each
 having the same shape on the leading *n-2* dimensions. For every *n-2* dimensional index *i* let
@@ -113,13 +115,13 @@ Examples::
    // Single matrix multiply
    A = [[1.0, 1.0], [1.0, 1.0]]
    B = [[1.0, 1.0], [1.0, 1.0], [1.0, 1.0]]
-   linalg_gemm2(A, B, transpose_b = 1, alpha = 2.0)
+   gemm2(A, B, transpose_b = 1, alpha = 2.0)
             = [[4.0, 4.0, 4.0], [4.0, 4.0, 4.0]]
 
    // Batch matrix multiply
    A = [[[1.0, 1.0]], [[0.1, 0.1]]]
    B = [[[1.0, 1.0]], [[0.1, 0.1]]]
-   linalg_gemm2(A, B, transpose_b = 1, alpha = 2.0 )
+   gemm2(A, B, transpose_b = 1, alpha = 2.0 )
            = [[[4.0]], [[0.04 ]]]
 )code" ADD_FILELINE)
 .set_num_inputs(2)
@@ -146,7 +148,8 @@ NNVM_REGISTER_OP(_backward_linalg_gemm2)
 .set_attr<nnvm::TIsBackward>("TIsBackward", true)
 .set_attr<FCompute>("FCompute<cpu>", LaOpBackward<cpu, 2, 2, 3, 2, gemm2_backward>);
 
-NNVM_REGISTER_OP(linalg_potrf)
+NNVM_REGISTER_OP(_linalg_potrf)
+.add_alias("linalg_potrf")
 .describe(R"code(Performs Cholesky factorization of a symmetric positive-definite matrix.
 Input is a tensor *A* of dimension *n >= 2*. For every *n-2* dimensional index *i* let
 *A*\ :sub:`i`\  be the matrix given by the last *2* dimensions.
@@ -169,11 +172,11 @@ Examples::
 
    // Single matrix factorization
    A = [[4.0, 1.0], [1.0, 4.25]]
-   linalg_potrf(A) = [[2.0, 0], [0.5, 2.0]]
+   potrf(A) = [[2.0, 0], [0.5, 2.0]]
 
    // Batch matrix factorization
    A = [[[4.0, 1.0], [1.0, 4.25]], [[16.0, 4.0], [4.0, 17.0]]]
-   linalg_potrf(A) = [[[2.0, 0], [0.5, 2.0]], [[4.0, 0], [1.0, 4.0]]]
+   potrf(A) = [[[2.0, 0], [0.5, 2.0]], [[4.0, 0], [1.0, 4.0]]]
 )code" ADD_FILELINE)
 .set_num_inputs(1)
 .set_num_outputs(1)
@@ -198,7 +201,8 @@ NNVM_REGISTER_OP(_backward_linalg_potrf)
 .set_attr<FCompute>("FCompute<cpu>", LaOpBackward<cpu, 2, 2, 2, 1, potrf_backward>);
 
 
-NNVM_REGISTER_OP(linalg_potri)
+NNVM_REGISTER_OP(_linalg_potri)
+.add_alias("linalg_potri")
 .describe(R"code(Performs matrix inversion from a Cholesky factorization.
 Input is a tensor *A* of dimension *n >= 2*. For every *n-2* dimensional index *i* let
 *A*\ :sub:`i`\  be the matrix given by the last *2* dimensions.
@@ -220,11 +224,11 @@ Examples::
 
    // Single matrix inverse
    A = [[2.0, 0], [0.5, 2.0]]
-   linalg_potri(A) = [[0.26563, -0.0625], [-0.0625, 0.25]]
+   potri(A) = [[0.26563, -0.0625], [-0.0625, 0.25]]
 
    // Batch matrix inverse
    A = [[[2.0, 0], [0.5, 2.0]], [[4.0, 0], [1.0, 4.0]]]
-   linalg_potri(A) = [[[0.26563, -0.0625], [-0.0625, 0.25]],
+   potri(A) = [[[0.26563, -0.0625], [-0.0625, 0.25]],
                   [[0.06641, -0.01562], [-0.01562, 0,0625]]]
 )code" ADD_FILELINE)
 .set_num_inputs(1)
@@ -247,7 +251,8 @@ NNVM_REGISTER_OP(_backward_linalg_potri)
 .set_attr<nnvm::TIsBackward>("TIsBackward", true)
 .set_attr<FCompute>("FCompute<cpu>", LaOpBackward<cpu, 2, 2, 3, 1, potri_backward>);
 
-NNVM_REGISTER_OP(linalg_trmm)
+NNVM_REGISTER_OP(_linalg_trmm)
+.add_alias("linalg_trmm")
 .describe(R"code(Performs multiplication with a triangular matrix.
 Input are two tensors *A*, *B* each of dimension *n >= 2* and each
 having the same shape on the leading *n-2* dimensions. For every *n-2* dimensional index *i* let
@@ -275,12 +280,12 @@ Examples::
    // Single matrix multiply
    A = [[1.0, 0], [1.0, 1.0]]
    B = [[1.0, 1.0, 1.0], [1.0, 1.0, 1.0]]
-   linalg_trmm(A, B, alpha = 2.0) = [[2.0, 2.0, 2.0], [4.0, 4.0, 4.0]]
+   trmm(A, B, alpha = 2.0) = [[2.0, 2.0, 2.0], [4.0, 4.0, 4.0]]
 
    // Batch matrix multiply
    A = [[[1.0, 0], [1.0, 1.0]], [[1.0, 0], [1.0, 1.0]]]
    B = [[[1.0, 1.0, 1.0], [1.0, 1.0, 1.0]], [[0.5, 0.5, 0.5], [0.5, 0.5, 0.5]]]
-   linalg_trmm(A, B, alpha = 2.0 ) = [[[2.0, 2.0, 2.0], [4.0, 4.0, 4.0]],
+   trmm(A, B, alpha = 2.0 ) = [[[2.0, 2.0, 2.0], [4.0, 4.0, 4.0]],
                                   [[1.0, 1.0, 1.0], [2.0, 2.0, 2.0]]]
 
 )code" ADD_FILELINE)
@@ -310,7 +315,8 @@ NNVM_REGISTER_OP(_backward_linalg_trmm)
 .set_attr<nnvm::TIsBackward>("TIsBackward", true)
 .set_attr<FCompute>("FCompute<cpu>", LaOpBackward<cpu, 2, 2, 4, 2, trmm_backward>);
 
-NNVM_REGISTER_OP(linalg_trsm)
+NNVM_REGISTER_OP(_linalg_trsm)
+.add_alias("linalg_trsm")
 .describe(R"code(Solves matrix equations involving a triangular matrix.
 Input are two tensors *A*, *B* each of dimension *n >= 2* and each
 having the same shape on the leading *n-2* dimensions. For every *n-2* dimensional index *i* let
@@ -338,13 +344,13 @@ Examples::
    // Single matrix solve
    A = [[1.0, 0], [1.0, 1.0]]
    B = [[2.0, 2.0, 2.0], [4.0, 4.0, 4.0]]
-   linalg_trsm(A, B, alpha = 0.5) = [[1.0, 1.0, 1.0], [1.0, 1.0, 1.0]]
+   trsm(A, B, alpha = 0.5) = [[1.0, 1.0, 1.0], [1.0, 1.0, 1.0]]
 
    // Batch matrix solve
    A = [[[1.0, 0], [1.0, 1.0]], [[1.0, 0], [1.0, 1.0]]]
    B = [[[2.0, 2.0, 2.0], [4.0, 4.0, 4.0]],
         [[4.0, 4.0, 4.0], [8.0, 8.0, 8.0]]]
-   linalg_trsm(A, B, alpha = 0.5 ) = [[[1.0, 1.0, 1.0], [1.0, 1.0, 1.0]],
+   trsm(A, B, alpha = 0.5 ) = [[[1.0, 1.0, 1.0], [1.0, 1.0, 1.0]],
                                   [[2.0, 2.0, 2.0 ], [2.0, 2.0, 2.0]]]
 )code" ADD_FILELINE)
 .set_num_inputs(2)
@@ -373,7 +379,8 @@ NNVM_REGISTER_OP(_backward_linalg_trsm)
 .set_attr<nnvm::TIsBackward>("TIsBackward", true)
 .set_attr<FCompute>("FCompute<cpu>", LaOpBackward<cpu, 2, 2, 4, 2, trsm_backward>);
 
-NNVM_REGISTER_OP(linalg_sumlogdiag)
+NNVM_REGISTER_OP(_linalg_sumlogdiag)
+.add_alias("linalg_sumlogdiag")
 .describe(R"code(Computes the sum of the logarithms of all diagonal elements in a matrix.
 Input is a tensor *A* of dimension *n >= 2*. For every *n-2* dimensional index *i* let
 *A*\ :sub:`i`\  be the matrix given by the last *2* dimensions.
@@ -389,11 +396,11 @@ Examples::
 
    // Single matrix reduction
    A = [[1.0, 1.0], [1.0, 7.0]]
-   linalg_sumlogdiag(A) = [1.9459]
+   sumlogdiag(A) = [1.9459]
 
    // Batch matrix reduction
    A = [[[1.0, 1.0], [1.0, 7.0]], [[3.0, 0], [0, 17.0]]]
-   linalg_sumlogdiag(A) = [1.9459, 3.9318]
+   sumlogdiag(A) = [1.9459, 3.9318]
 )code" ADD_FILELINE)
 .set_num_inputs(1)
 .set_num_outputs(1)
diff --git a/src/operator/tensor/la_op.cu b/src/operator/tensor/la_op.cu
index a89d98f..e5d5b27 100644
--- a/src/operator/tensor/la_op.cu
+++ b/src/operator/tensor/la_op.cu
@@ -27,37 +27,37 @@
 namespace mxnet {
 namespace op {
 
-NNVM_REGISTER_OP(linalg_gemm)
+NNVM_REGISTER_OP(_linalg_gemm)
 .set_attr<FCompute>("FCompute<gpu>", LaOpForward<gpu, 2, 2, 3, 1, gemm>);
 
 NNVM_REGISTER_OP(_backward_linalg_gemm)
 .set_attr<FCompute>("FCompute<gpu>", LaOpBackward<gpu, 2, 2, 4, 3, gemm_backward>);
 
-NNVM_REGISTER_OP(linalg_gemm2)
+NNVM_REGISTER_OP(_linalg_gemm2)
 .set_attr<FCompute>("FCompute<gpu>", LaOpForward<gpu, 2, 2, 2, 1, gemm2>);
 
 NNVM_REGISTER_OP(_backward_linalg_gemm2)
 .set_attr<FCompute>("FCompute<gpu>", LaOpBackward<gpu, 2, 2, 3, 2, gemm2_backward>);
 
-NNVM_REGISTER_OP(linalg_trmm)
+NNVM_REGISTER_OP(_linalg_trmm)
 .set_attr<FCompute>("FCompute<gpu>", LaOpForward<gpu, 2, 2, 2, 1, trmm>);
 
 NNVM_REGISTER_OP(_backward_linalg_trmm)
 .set_attr<FCompute>("FCompute<gpu>", LaOpBackward<gpu, 2, 2, 4, 2, trmm_backward>);
 
-NNVM_REGISTER_OP(linalg_trsm)
+NNVM_REGISTER_OP(_linalg_trsm)
 .set_attr<FCompute>("FCompute<gpu>", LaOpForward<gpu, 2, 2, 2, 1, trsm>);
 
 NNVM_REGISTER_OP(_backward_linalg_trsm)
 .set_attr<FCompute>("FCompute<gpu>", LaOpBackward<gpu, 2, 2, 4, 2, trsm_backward>);
 
-NNVM_REGISTER_OP(linalg_sumlogdiag)
+NNVM_REGISTER_OP(_linalg_sumlogdiag)
 .set_attr<FCompute>("FCompute<gpu>", LaOpForward<gpu, 2, 0, 1, 1, sumlogdiag>);
 
 NNVM_REGISTER_OP(_backward_linalg_sumlogdiag)
 .set_attr<FCompute>("FCompute<gpu>", LaOpBackward<gpu, 2, 2, 2, 1, sumlogdiag_backward>);
 
-NNVM_REGISTER_OP(linalg_potri)
+NNVM_REGISTER_OP(_linalg_potri)
 .set_attr<FCompute>("FCompute<gpu>", LaOpForward<gpu, 2, 2, 1, 1, potri>);
 
 NNVM_REGISTER_OP(_backward_linalg_potri)
@@ -65,7 +65,7 @@ NNVM_REGISTER_OP(_backward_linalg_potri)
 
 #if MXNET_USE_CUSOLVER == 1
 
-NNVM_REGISTER_OP(linalg_potrf)
+NNVM_REGISTER_OP(_linalg_potrf)
 .set_attr<FCompute>("FCompute<gpu>", LaOpForward<gpu, 2, 2, 1, 1, potrf>);
 
 NNVM_REGISTER_OP(_backward_linalg_potrf)
diff --git a/src/operator/tensor/matrix_op.cc b/src/operator/tensor/matrix_op.cc
index d409b9e..dfc4a5a 100644
--- a/src/operator/tensor/matrix_op.cc
+++ b/src/operator/tensor/matrix_op.cc
@@ -247,6 +247,7 @@ will return a new array with shape ``(2,1,3,4)``.
 .add_arguments(ExpandDimParam::__FIELDS__());
 
 NNVM_REGISTER_OP(slice)
+.add_alias("_sparse_slice")
 .add_alias("crop")
 .describe(R"code(Slices a contiguous region of the array.
 
diff --git a/tests/python/gpu/test_operator_gpu.py b/tests/python/gpu/test_operator_gpu.py
index 04426d4..b2f620c 100644
--- a/tests/python/gpu/test_operator_gpu.py
+++ b/tests/python/gpu/test_operator_gpu.py
@@ -43,7 +43,7 @@ del test_support_vector_machine_l2_svm
 
 
 def check_countsketch(in_dim,out_dim,n):
-    sym = mx.contrib.sym.count_sketch(name='countsketch',out_dim = out_dim)
+    sym = mx.sym.contrib.count_sketch(name='countsketch',out_dim = out_dim)
     shape = [(n,in_dim), (1,in_dim),(1,in_dim)]     #shape of input x, hash h and hash s
 
     arr = [mx.nd.empty(shape[i]) for i in range(3)]
@@ -109,7 +109,7 @@ def check_ifft(shape):
             shape = tuple(lst)
             shape_old = shape
         shape = (shape[0],shape[1],shape[2],shape[3]*2)
-    sym = mx.contrib.sym.ifft(name='ifft', compute_size = 128)
+    sym = mx.sym.contrib.ifft(name='ifft', compute_size = 128)
     init = [np.random.normal(size=shape, scale=1.0)]
     arr_grad = [mx.nd.empty(shape)]
     ctx_list = [{'ctx': mx.gpu(0),'ifft_data': shape, 'type_dict': {'ifft_data': np.float32}}]
@@ -175,7 +175,7 @@ def test_ifft():
             check_ifft(shape)
 
 def check_fft(shape):
-    sym = mx.contrib.sym.fft(name='fft', compute_size = 128)
+    sym = mx.sym.contrib.fft(name='fft', compute_size = 128)
     if len(shape) == 2:
         if shape[1]%2 != 0:
             lst = list(shape)
@@ -1143,7 +1143,7 @@ def test_psroipooling_with_type():
         'psroipool_rois': np.array([[0, 10, 22, 161, 173], [0, 20, 15, 154, 160]])}
 
     # plain psroipooling
-    sym = mx.contrib.sym.PSROIPooling(spatial_scale=0.0625, output_dim=2, pooled_size=3, name='psroipool')
+    sym = mx.sym.contrib.PSROIPooling(spatial_scale=0.0625, output_dim=2, pooled_size=3, name='psroipool')
     ctx_list = [{'ctx': mx.gpu(0),
                  'psroipool_data': (1, 18, 14, 14),
                  'psroipool_rois': (2, 5),
@@ -1167,7 +1167,7 @@ def test_deformable_psroipooling_with_type():
         'deformable_psroipool_rois': np.array([[0, 10, 22, 161, 173], [0, 20, 15, 154, 160]])}
 
     # deformable psroipooling
-    sym = mx.contrib.sym.DeformablePSROIPooling(spatial_scale=0.0625, sample_per_part=4, group_size=3, pooled_size=3,
+    sym = mx.sym.contrib.DeformablePSROIPooling(spatial_scale=0.0625, sample_per_part=4, group_size=3, pooled_size=3,
                                                 output_dim=2, trans_std=0.1, no_trans=False, name='deformable_psroipool')
 
     ctx_list = [{'ctx': mx.gpu(0),
@@ -1196,7 +1196,7 @@ def test_deformable_psroipooling_with_type():
 
 def test_deformable_convolution_with_type():
     np.random.seed(1234)
-    sym = mx.contrib.sym.DeformableConvolution(num_filter=3, kernel=(3,3), name='deformable_conv')
+    sym = mx.sym.contrib.DeformableConvolution(num_filter=3, kernel=(3,3), name='deformable_conv')
     # since atomicAdd does not support fp16 (which deformable conv uses in backward), we do not test fp16 here
     ctx_list = [{'ctx': mx.gpu(0),
                  'deformable_conv_data': (2, 2, 10, 10),
@@ -1241,7 +1241,7 @@ def test_deformable_convolution_options():
                 #  'deformable_offset': (2, 18, 7, 7),
                 #  'type_dict': {'deformable_conv_data': np.float16, 'deformable_offset': np.float16}},
                 ]
-    sym = mx.contrib.sym.DeformableConvolution(num_filter=3, kernel=(3,3), pad=(1,1), name='deformable_conv')
+    sym = mx.sym.contrib.DeformableConvolution(num_filter=3, kernel=(3,3), pad=(1,1), name='deformable_conv')
     check_consistency(sym, ctx_list)
 
     # Stride > 1
@@ -1259,7 +1259,7 @@ def test_deformable_convolution_options():
                 # 'deformable_conv_offset': (2, 18, 3, 3),
                 #  'type_dict': {'deformable_conv_data': np.float16, 'deformable_offset': np.float16}},
                 ]
-    sym = mx.contrib.sym.DeformableConvolution(num_filter=3, kernel=(3,3), stride=(2,2), name='deformable_conv')
+    sym = mx.sym.contrib.DeformableConvolution(num_filter=3, kernel=(3,3), stride=(2,2), name='deformable_conv')
     check_consistency(sym, ctx_list)
 
     # Dilate > 1
@@ -1277,7 +1277,7 @@ def test_deformable_convolution_options():
                 # 'deformable_conv_offset': (2, 18, 3, 3),
                 #  'type_dict': {'deformable_conv_data': np.float16, 'deformable_offset': np.float16}},
                 ]
-    sym = mx.contrib.sym.DeformableConvolution(num_filter=3, kernel=(3,3), dilate=(2,2), name='deformable_conv')
+    sym = mx.sym.contrib.DeformableConvolution(num_filter=3, kernel=(3,3), dilate=(2,2), name='deformable_conv')
     check_consistency(sym, ctx_list)
 
     # Deformable group > 1
@@ -1295,7 +1295,7 @@ def test_deformable_convolution_options():
                 #  'deformable_conv_offset': (2, 36, 5, 5),
                 #  'type_dict': {'deformable_conv_data': np.float16, 'deformable_offset': np.float16}},
                 ]
-    sym = mx.contrib.sym.DeformableConvolution(num_filter=4, kernel=(3,3), num_deformable_group=2,
+    sym = mx.sym.contrib.DeformableConvolution(num_filter=4, kernel=(3,3), num_deformable_group=2,
                                                name='deformable_conv')
 
 def test_residual_fused():
diff --git a/tests/python/unittest/test_autograd.py b/tests/python/unittest/test_autograd.py
index 37bb562..9e1eb66 100644
--- a/tests/python/unittest/test_autograd.py
+++ b/tests/python/unittest/test_autograd.py
@@ -344,9 +344,9 @@ def test_function():
             return dx, dy
 
     f = func()
-    x = mx.nd.random_uniform(shape=(10,))
+    x = mx.nd.random.uniform(shape=(10,))
     x.attach_grad()
-    y = mx.nd.random_uniform(shape=(10,))
+    y = mx.nd.random.uniform(shape=(10,))
     y.attach_grad()
     with record():
         m, n = f(x, y)
diff --git a/tests/python/unittest/test_gluon.py b/tests/python/unittest/test_gluon.py
index 726213d..f64e95b 100644
--- a/tests/python/unittest/test_gluon.py
+++ b/tests/python/unittest/test_gluon.py
@@ -323,7 +323,7 @@ def check_split_data(x, num_slice, batch_axis, **kwargs):
 
 
 def test_split_data():
-    x = mx.nd.random_uniform(shape=(128, 33, 64))
+    x = mx.nd.random.uniform(shape=(128, 33, 64))
 
     check_split_data(x, 8, 0)
     check_split_data(x, 3, 1)
diff --git a/tests/python/unittest/test_gluon_model_zoo.py b/tests/python/unittest/test_gluon_model_zoo.py
index 6fbcf8b..756979d 100644
--- a/tests/python/unittest/test_gluon_model_zoo.py
+++ b/tests/python/unittest/test_gluon_model_zoo.py
@@ -45,7 +45,7 @@ def test_concurrent():
 
 def test_identity():
     model = Identity()
-    x = mx.nd.random_uniform(shape=(128, 33, 64))
+    x = mx.nd.random.uniform(shape=(128, 33, 64))
     mx.test_utils.assert_almost_equal(model(x).asnumpy(),
                                       x.asnumpy())
 
@@ -68,7 +68,7 @@ def test_models():
         print(model)
         if not test_pretrain:
             model.collect_params().initialize()
-        model(mx.nd.random_uniform(shape=data_shape)).wait_to_read()
+        model(mx.nd.random.uniform(shape=data_shape)).wait_to_read()
 
 
 if __name__ == '__main__':
diff --git a/tests/python/unittest/test_module.py b/tests/python/unittest/test_module.py
index 9d8d76f..8a5fd90 100644
--- a/tests/python/unittest/test_module.py
+++ b/tests/python/unittest/test_module.py
@@ -587,8 +587,8 @@ def test_forward_reshape():
     mod.init_optimizer(optimizer_params={'learning_rate': 0.01})
 
     # Train with original data shapes
-    data_batch = mx.io.DataBatch(data=[mx.nd.random_uniform(0, 9, dshape1),
-                                       mx.nd.random_uniform(5, 15, dshape2)],
+    data_batch = mx.io.DataBatch(data=[mx.nd.random.uniform(0, 9, dshape1),
+                                       mx.nd.random.uniform(5, 15, dshape2)],
                                  label=[mx.nd.ones(lshape)])
     mod.forward(data_batch)
     assert mod.get_outputs()[0].shape == tuple([lshape[0], num_class])
@@ -599,8 +599,8 @@ def test_forward_reshape():
     dshape1 = (3, 3, 64, 64)
     dshape2 = (3, 3, 32, 32)
     lshape = (3,)
-    data_batch = mx.io.DataBatch(data=[mx.nd.random_uniform(0, 9, dshape1),
-                                       mx.nd.random_uniform(5, 15, dshape2)],
+    data_batch = mx.io.DataBatch(data=[mx.nd.random.uniform(0, 9, dshape1),
+                                       mx.nd.random.uniform(5, 15, dshape2)],
                                  label=[mx.nd.ones(lshape)])
     mod.forward(data_batch)
     assert mod.get_outputs()[0].shape == tuple([lshape[0], num_class])
@@ -610,8 +610,8 @@ def test_forward_reshape():
     dshape1 = (20, 3, 64, 64)
     dshape2 = (20, 3, 32, 32)
     lshape = (20,)
-    data_batch = mx.io.DataBatch(data=[mx.nd.random_uniform(3, 5, dshape1),
-                                       mx.nd.random_uniform(10, 25, dshape2)],
+    data_batch = mx.io.DataBatch(data=[mx.nd.random.uniform(3, 5, dshape1),
+                                       mx.nd.random.uniform(10, 25, dshape2)],
                                  label=[mx.nd.ones(lshape)])
     mod.forward(data_batch)
     assert mod.get_outputs()[0].shape == tuple([lshape[0], num_class])
@@ -622,8 +622,8 @@ def test_forward_reshape():
     dshape1 = (20, 3, 120, 120)
     dshape2 = (20, 3, 32, 64)
     lshape = (20,)
-    data_batch = mx.io.DataBatch(data=[mx.nd.random_uniform(0, 9, dshape1),
-                                       mx.nd.random_uniform(5, 15, dshape2)],
+    data_batch = mx.io.DataBatch(data=[mx.nd.random.uniform(0, 9, dshape1),
+                                       mx.nd.random.uniform(5, 15, dshape2)],
                                  label=[mx.nd.ones(lshape)])
     mod.forward(data_batch)
     assert mod.get_outputs()[0].shape == tuple([lshape[0], num_class])
@@ -633,8 +633,8 @@ def test_forward_reshape():
     dshape1 = (5, 3, 28, 40)
     dshape2 = (5, 3, 24, 16)
     lshape = (5,)
-    data_batch = mx.io.DataBatch(data=[mx.nd.random_uniform(0, 9, dshape1),
-                                       mx.nd.random_uniform(15, 25, dshape2)],
+    data_batch = mx.io.DataBatch(data=[mx.nd.random.uniform(0, 9, dshape1),
+                                       mx.nd.random.uniform(15, 25, dshape2)],
                                  label=[mx.nd.ones(lshape)])
     mod.forward(data_batch)
     assert mod.get_outputs()[0].shape == tuple([lshape[0], num_class])
@@ -646,8 +646,8 @@ def test_forward_reshape():
     dataset_shape2 = (30, 3, 20, 40)
     labelset_shape = (30,)
 
-    eval_dataiter = mx.io.NDArrayIter(data=[mx.nd.random_uniform(0, 9, dataset_shape1),
-                                            mx.nd.random_uniform(15, 25, dataset_shape2)],
+    eval_dataiter = mx.io.NDArrayIter(data=[mx.nd.random.uniform(0, 9, dataset_shape1),
+                                            mx.nd.random.uniform(15, 25, dataset_shape2)],
                                       label=[mx.nd.ones(labelset_shape)],
                                       batch_size=5)
     assert len(mod.score(eval_data=eval_dataiter, eval_metric='acc')) == 1
@@ -658,8 +658,8 @@ def test_forward_reshape():
     dataset_shape1 = (10, 3, 30, 30)
     dataset_shape2 = (10, 3, 20, 40)
 
-    pred_dataiter = mx.io.NDArrayIter(data=[mx.nd.random_uniform(0, 9, dataset_shape1),
-                                            mx.nd.random_uniform(15, 25, dataset_shape2)])
+    pred_dataiter = mx.io.NDArrayIter(data=[mx.nd.random.uniform(0, 9, dataset_shape1),
+                                            mx.nd.random.uniform(15, 25, dataset_shape2)])
     mod.bind(data_shapes=[('data1', dshape1), ('data2', dshape2)],
              for_training=False, force_rebind=True)
     assert mod.predict(pred_dataiter).shape == tuple([10, num_class])
diff --git a/tests/python/unittest/test_ndarray.py b/tests/python/unittest/test_ndarray.py
index f2c6a83..7d11dbe 100644
--- a/tests/python/unittest/test_ndarray.py
+++ b/tests/python/unittest/test_ndarray.py
@@ -284,7 +284,7 @@ def test_ndarray_slice():
     assert same(A[3:8].asnumpy(), A2[3:8])
 
     shape = (3,4,5,6,7)
-    A = mx.nd.random_uniform(shape=shape)
+    A = mx.nd.random.uniform(shape=shape)
     A2 = A.asnumpy()
 
     assert same(A[1,3:4,:,1:5].asnumpy(), A2[1,3:4,:,1:5])
diff --git a/tests/python/unittest/test_operator.py b/tests/python/unittest/test_operator.py
index ceb11ed..727312b 100644
--- a/tests/python/unittest/test_operator.py
+++ b/tests/python/unittest/test_operator.py
@@ -1260,7 +1260,7 @@ def test_broadcast_binary_op():
         c = mx.sym.broadcast_power(a, b)
         check_binary_op_forward(c, lambda a, b: a ** b, gen_broadcast_data, mx_nd_func=mx.nd.power)
         check_binary_op_backward(c, lambda g_out, a, b: (g_out * a **(b - 1) * b,
-                                        g_out * a ** b * np.log(a)), gen_broadcast_data)
+                                                         g_out * a ** b * np.log(a)), gen_broadcast_data)
 
     def test_bequal(a, b):
         c = mx.sym.broadcast_equal(a, b)
@@ -3354,7 +3354,7 @@ def test_pick():
 def check_ctc_loss(acts, labels, loss_truth):
     in_var = mx.sym.Variable('input')
     labels_var = mx.sym.Variable('labels')
-    ctc = mx.contrib.sym.ctc_loss(in_var, labels_var)
+    ctc = mx.sym.contrib.ctc_loss(in_var, labels_var)
     acts_nd = mx.nd.array(acts, ctx=default_context())
     labels_nd = mx.nd.array(labels, ctx=default_context())
     exe = ctc.bind(ctx=default_context(), args=[acts_nd, labels_nd])
@@ -3397,8 +3397,8 @@ def test_quantization_op():
     min0 = mx.nd.array([0.0])
     max0 = mx.nd.array([1.0])
     a  = mx.nd.array([[0.1392, 0.5928], [0.6027, 0.8579]])
-    qa, min1, max1 = mx.contrib.nd.quantize(a, min0, max0, out_type='uint8')
-    a_ = mx.contrib.nd.dequantize(qa, min1, max1, out_type='float32')
+    qa, min1, max1 = mx.nd.contrib.quantize(a, min0, max0, out_type='uint8')
+    a_ = mx.nd.contrib.dequantize(qa, min1, max1, out_type='float32')
 
     qa_real = mx.nd.array([[35, 151], [154, 219]])
     a_real  = mx.nd.array([[0.13725491, 0.59215689], [0.60392159, 0.8588236]])
@@ -3480,7 +3480,7 @@ def test_psroipooling():
 
                     im_data_var = mx.symbol.Variable(name="im_data")
                     rois_data_var = mx.symbol.Variable(name="rois_data")
-                    op = mx.contrib.sym.PSROIPooling(data=im_data_var, rois=rois_data_var, spatial_scale=spatial_scale,
+                    op = mx.sym.contrib.PSROIPooling(data=im_data_var, rois=rois_data_var, spatial_scale=spatial_scale,
                                                      group_size=num_group, pooled_size=num_group,
                                                      output_dim=num_classes, name='test_op')
                     rtol, atol = 1e-2, 1e-3
@@ -3510,7 +3510,7 @@ def test_deformable_convolution():
                         offset_data_var = mx.symbol.Variable(name="offset_data")
                         weight_var = mx.symbol.Variable(name="weight")
                         bias_var = mx.symbol.Variable(name="bias")
-                        op = mx.contrib.sym.DeformableConvolution(name='test_op', data=im_data_var,
+                        op = mx.sym.contrib.DeformableConvolution(name='test_op', data=im_data_var,
                                                                   offset=offset_data_var,
                                                                   weight=weight_var, bias=bias_var,
                                                                   num_filter=num_channel_data, pad=dilate,
@@ -3544,7 +3544,7 @@ def test_deformable_psroipooling():
                     im_data_var = mx.symbol.Variable(name="im_data")
                     rois_data_var = mx.symbol.Variable(name="rois_data")
                     offset_data_var = mx.symbol.Variable(name="offset_data")
-                    op = mx.contrib.sym.DeformablePSROIPooling(data=im_data_var, rois=rois_data_var,
+                    op = mx.sym.contrib.DeformablePSROIPooling(data=im_data_var, rois=rois_data_var,
                                                                trans=offset_data_var, spatial_scale=spatial_scale,
                                                                sample_per_part=4, group_size=num_group,
                                                                pooled_size=num_group, output_dim=num_classes,
@@ -3585,22 +3585,22 @@ def test_laop():
     data_in1_t = np.transpose(data_in1)
     data_in2_t = np.transpose(data_in2)
     res_gemm = 4*np.dot(data_in1,data_in2)+7*data_in4
-    test_gemm = mx.sym.linalg_gemm(data1, data2, data3, alpha = 4, beta = 7)
+    test_gemm = mx.sym.linalg.gemm(data1, data2, data3, alpha = 4, beta = 7)
     check_symbolic_forward(test_gemm, [data_in1, data_in2, data_in4], [res_gemm])
     if grad_check == 1:
       check_numeric_gradient(test_gemm, [data_in1, data_in2, data_in4], numeric_eps=1e-3, rtol=1e-1, atol=1e-1)
     res_gemm = 4*np.dot(data_in1_t,data_in2_t)+7*data_in3
-    test_gemm = mx.sym.linalg_gemm(data1, data2, data3, alpha = 4, beta = 7, transpose_a = 1, transpose_b = 1)
+    test_gemm = mx.sym.linalg.gemm(data1, data2, data3, alpha = 4, beta = 7, transpose_a = 1, transpose_b = 1)
     check_symbolic_forward(test_gemm, [data_in1, data_in2, data_in3], [res_gemm])
     if grad_check == 1:
       check_numeric_gradient(test_gemm, [data_in1, data_in2, data_in3], numeric_eps=1e-3, rtol=1e-1, atol=1e-1)
     res_gemm = 4*np.dot(data_in1_t,data_in1)+7*data_in3
-    test_gemm = mx.sym.linalg_gemm(data1, data2, data3, alpha = 4, beta = 7, transpose_a = 1)
+    test_gemm = mx.sym.linalg.gemm(data1, data2, data3, alpha = 4, beta = 7, transpose_a = 1)
     check_symbolic_forward(test_gemm, [data_in1, data_in1, data_in3], [res_gemm])
     if grad_check == 1:
       check_numeric_gradient(test_gemm, [data_in1, data_in1, data_in3], numeric_eps=1e-3, rtol=1e-1, atol=1e-1)
     res_gemm = 4*np.dot(data_in1,data_in1_t)+7*data_in4
-    test_gemm = mx.sym.linalg_gemm(data1, data2, data3, alpha = 4, beta = 7, transpose_b = 1)
+    test_gemm = mx.sym.linalg.gemm(data1, data2, data3, alpha = 4, beta = 7, transpose_b = 1)
     check_symbolic_forward(test_gemm, [data_in1, data_in1, data_in4], [res_gemm])
     if grad_check == 1:
       check_numeric_gradient(test_gemm, [data_in1, data_in1, data_in4], numeric_eps=1e-3, rtol=1e-1, atol=1e-1)
@@ -3615,29 +3615,29 @@ def test_laop():
     r = 4*np.dot(data_in1,data_in2)+7*data_in4
     r = np.tile(r.flatten(),3)
     r = np.reshape(r,(3,1,2,2))
-    test_gemm = mx.sym.linalg_gemm(data1, data2, data3, alpha = 4, beta = 7)
+    test_gemm = mx.sym.linalg.gemm(data1, data2, data3, alpha = 4, beta = 7)
     check_symbolic_forward(test_gemm, [a, b, c], [r])
     if grad_check == 1:
       check_numeric_gradient(test_gemm, [a, b, c], numeric_eps=1e-3, rtol=1e-1, atol=1e-1)
 
     # Check gemm2 operator same way as gemm.
     res_gemm = 4*np.dot(data_in1,data_in2)
-    test_gemm = mx.sym.linalg_gemm2(data1, data2, alpha = 4)
+    test_gemm = mx.sym.linalg.gemm2(data1, data2, alpha = 4)
     check_symbolic_forward(test_gemm, [data_in1, data_in2], [res_gemm])
     if grad_check == 1:
       check_numeric_gradient(test_gemm, [data_in1, data_in2], numeric_eps=1e-3, rtol=1e-1, atol=1e-1)
     res_gemm = 4*np.dot(data_in1_t, data_in2_t)
-    test_gemm = mx.sym.linalg_gemm2(data1, data2, alpha = 4, transpose_a = 1, transpose_b = 1)
+    test_gemm = mx.sym.linalg.gemm2(data1, data2, alpha = 4, transpose_a = 1, transpose_b = 1)
     check_symbolic_forward(test_gemm, [data_in1, data_in2], [res_gemm])
     if grad_check == 1:
       check_numeric_gradient(test_gemm, [data_in1, data_in2], numeric_eps=1e-3, rtol=1e-1, atol=1e-1)
     res_gemm = 4*np.dot(data_in1_t,data_in1)
-    test_gemm = mx.sym.linalg_gemm2(data1, data2, alpha = 4, transpose_a = 1)
+    test_gemm = mx.sym.linalg.gemm2(data1, data2, alpha = 4, transpose_a = 1)
     check_symbolic_forward(test_gemm, [data_in1, data_in1], [res_gemm])
     if grad_check == 1:
       check_numeric_gradient(test_gemm, [data_in1, data_in1], numeric_eps=1e-3, rtol=1e-1, atol=1e-1)
     res_gemm = 4*np.dot(data_in1,data_in1_t)
-    test_gemm = mx.sym.linalg_gemm2(data1, data2, alpha = 4, transpose_b = 1)
+    test_gemm = mx.sym.linalg.gemm2(data1, data2, alpha = 4, transpose_b = 1)
     check_symbolic_forward(test_gemm, [data_in1, data_in1], [res_gemm])
     if grad_check == 1:
       check_numeric_gradient(test_gemm, [data_in1, data_in1], numeric_eps=1e-3, rtol=1e-1, atol=1e-1)
@@ -3650,7 +3650,7 @@ def test_laop():
     r = 4*np.dot(data_in1,data_in2)
     r = np.tile(r.flatten(),3)
     r = np.reshape(r,(3,1,2,2))
-    test_gemm = mx.sym.linalg_gemm2(data1, data2, alpha = 4)
+    test_gemm = mx.sym.linalg.gemm2(data1, data2, alpha = 4)
     check_symbolic_forward(test_gemm, [a, b], [r])
     if grad_check == 1:
       check_numeric_gradient(test_gemm, [a, b], numeric_eps=1e-3, rtol=1e-1, atol=1e-1)
@@ -3662,32 +3662,32 @@ def test_laop():
     data_in = np.random.uniform(1, 10, shape)
     # test potrf
     res_potrf = np.sqrt(data_in)
-    test_potrf = mx.sym.linalg_potrf(data1)
+    test_potrf = mx.sym.linalg.potrf(data1)
     check_symbolic_forward(test_potrf, [data_in], [res_potrf])
     if grad_check == 1:
       check_numeric_gradient(test_potrf, [data_in])
     # test potri
     ones = mx.nd.ones(shape).asnumpy()
     res_potri = np.divide(ones,data_in*data_in)
-    test_potri = mx.sym.linalg_potri(data1)
+    test_potri = mx.sym.linalg.potri(data1)
     check_symbolic_forward(test_potri, [data_in], [res_potri])
     if grad_check == 1:
       check_numeric_gradient(test_potri, [data_in], atol = 0.01, rtol = 1.5)
     # test trsm
     trian_in = data_in *7
-    test_trsm = mx.sym.linalg_trsm(data1,data2,alpha = 7)
+    test_trsm = mx.sym.linalg.trsm(data1,data2,alpha = 7)
     check_symbolic_forward(test_trsm, [trian_in,data_in], [ones])
     if grad_check == 1:
       check_numeric_gradient(test_trsm, [trian_in,data_in], atol = 0.02, rtol = 2.0)
     # test trmm
     trian_in = np.divide(ones,trian_in)
-    test_trmm = mx.sym.linalg_trmm(data1,data2,alpha = 7, transpose = 1, rightside = 1)
+    test_trmm = mx.sym.linalg.trmm(data1,data2,alpha = 7, transpose = 1, rightside = 1)
     check_symbolic_forward(test_trmm, [trian_in,data_in], [ones])
     if grad_check == 1:
       check_numeric_gradient(test_trmm, [trian_in,data_in], atol = 0.02, rtol = 2.0)
     # test sumlogdiag
     res_sumlogdiag = np.reshape(np.log(data_in),(4,4))
-    test_sumlogdiag = mx.sym.linalg_sumlogdiag(data1)
+    test_sumlogdiag = mx.sym.linalg.sumlogdiag(data1)
     check_symbolic_forward(test_sumlogdiag, [data_in], [res_sumlogdiag])
     if grad_check == 1:
       check_numeric_gradient(test_sumlogdiag, [data_in], atol = 0.01, rtol = 2.0)
@@ -3733,14 +3733,14 @@ def test_laop():
     if grad_check == 1:
       check_numeric_gradient(test_trsm, [a,b], numeric_eps=1e-3, rtol=1e-2, atol=1e-1)
 
-    test_trsm2 = mx.sym.linalg_trsm(data1,data2,alpha = -2, rightside = 1, transpose = 1)
+    test_trsm2 = mx.sym.linalg.trsm(data1,data2,alpha = -2, rightside = 1, transpose = 1)
     r = -2*np.reshape(np.array(trian),(4,4))
     r = np.reshape(np.tile(np.reshape(r,(16)),3),(3,1,4,4))
     check_symbolic_forward(test_trsm2, [a,b], [r])
     if grad_check == 1:
       check_numeric_gradient(test_trsm2, [a,b], numeric_eps=1e-3, rtol=1e-2, atol=1e-1)
 
-    test_trsm3 = mx.sym.linalg_trsm(data1,data2,alpha = 0.50, transpose = 1)
+    test_trsm3 = mx.sym.linalg.trsm(data1,data2,alpha = 0.50, transpose = 1)
     b = np.transpose(np.reshape(np.array(trian),(4,4)))
     b = np.reshape(np.tile(np.reshape(b,(16)),3),(3,1,4,4))
     r = 0.5*np.reshape(np.array(ident),(4,4))
@@ -3749,7 +3749,7 @@ def test_laop():
     if grad_check == 1:
       check_numeric_gradient(test_trsm3, [a,b], numeric_eps=1e-3, rtol=1e-2, atol=1e-1)
 
-    test_trsm4 = mx.sym.linalg_trsm(data1,data2,alpha = -0.5, rightside = 1)
+    test_trsm4 = mx.sym.linalg.trsm(data1,data2,alpha = -0.5, rightside = 1)
     b = np.tile(np.array(trian),3)
     b = np.reshape(b,(3,1,4,4))
     r = -0.5*np.reshape(np.array(ident),(4,4))
@@ -3769,21 +3769,21 @@ def test_laop():
     if grad_check == 1:
       check_numeric_gradient(test_trmm, [a,b], numeric_eps=1e-3, rtol=1e-2, atol=1e-1)
 
-    test_trmm2 = mx.sym.linalg_trmm(data1,data2,alpha = -2)
+    test_trmm2 = mx.sym.linalg.trmm(data1,data2,alpha = -2)
     r = -2*np.dot(np.reshape(np.array(trian),(4,4)),np.reshape(np.array(matrix),(4,4)))
     r = np.reshape(np.tile(np.reshape(r,(16)),3),(3,1,4,4))
     check_symbolic_forward(test_trmm2, [a,b], [r])
     if grad_check == 1:
       check_numeric_gradient(test_trmm2, [a,b], numeric_eps=1e-3, rtol=1e-2, atol=1e-1)
 
-    test_trmm3 = mx.sym.linalg_trmm(data1,data2,rightside = 1)
+    test_trmm3 = mx.sym.linalg.trmm(data1,data2,rightside = 1)
     r = np.dot(np.reshape(np.array(matrix),(4,4)),np.reshape(np.array(trian),(4,4)))
     r = np.reshape(np.tile(np.reshape(r,(16)),3),(3,1,4,4))
     check_symbolic_forward(test_trmm3, [a,b], [r])
     if grad_check == 1:
       check_numeric_gradient(test_trmm3, [a,b], numeric_eps=1e-3, rtol=1e-2, atol=1e-1)
 
-    test_trmm4 = mx.sym.linalg_trmm(data1,data2,alpha = 1.2,transpose = 1)
+    test_trmm4 = mx.sym.linalg.trmm(data1,data2,alpha = 1.2,transpose = 1)
     r = 1.2*np.dot(np.transpose(np.reshape(np.array(trian),(4,4))),np.reshape(np.array(matrix),(4,4)))
     r = np.reshape(np.tile(np.reshape(r,(16)),3),(3,1,4,4))
     check_symbolic_forward(test_trmm4, [a,b], [r])
diff --git a/tests/python/unittest/test_random.py b/tests/python/unittest/test_random.py
index 6b8311c..01c8b0a 100644
--- a/tests/python/unittest/test_random.py
+++ b/tests/python/unittest/test_random.py
@@ -27,7 +27,7 @@ def check_with_device(device, dtype):
     symbols = [
         {
             'name': 'normal',
-            'symbol': mx.sym.random_normal,
+            'symbol': mx.sym.random.normal,
             'multisymbol': mx.sym.sample_normal,
             'ndop': mx.random.normal,
             'params': { 'loc': 10.0, 'scale': 0.5 },
@@ -39,7 +39,7 @@ def check_with_device(device, dtype):
         },
         {
             'name': 'uniform',
-            'symbol': mx.sym.random_uniform,
+            'symbol': mx.sym.random.uniform,
             'multisymbol': mx.sym.sample_uniform,
             'ndop': mx.random.uniform,
             'params': { 'low': -1.5, 'high': 3.0 },
@@ -54,7 +54,7 @@ def check_with_device(device, dtype):
         symbols.extend([
             {
                 'name': 'gamma',
-                'symbol': mx.sym.random_gamma,
+                'symbol': mx.sym.random.gamma,
                 'multisymbol': mx.sym.sample_gamma,
                 'ndop': mx.random.gamma,
                 'params': { 'alpha': 9.0, 'beta': 0.5 },
@@ -66,7 +66,7 @@ def check_with_device(device, dtype):
             },
             {
                 'name': 'exponential',
-                'symbol': mx.sym.random_exponential,
+                'symbol': mx.sym.random.exponential,
                 'multisymbol': mx.sym.sample_exponential,
                 'ndop': mx.random.exponential,
                 'params': { 'lam': 4.0 },
@@ -78,7 +78,7 @@ def check_with_device(device, dtype):
             },
             {
                 'name': 'poisson',
-                'symbol': mx.sym.random_poisson,
+                'symbol': mx.sym.random.poisson,
                 'ndop': mx.random.poisson,
                 'multisymbol': mx.sym.sample_poisson,
                 'params': { 'lam': 4.0 },
@@ -90,7 +90,7 @@ def check_with_device(device, dtype):
             },
             {
                 'name': 'neg-binomial',
-                'symbol': mx.sym.random_negative_binomial,
+                'symbol': mx.sym.random.negative_binomial,
                 'multisymbol': mx.sym.sample_negative_binomial,
                 'ndop': mx.random.negative_binomial,
                 'params': { 'k': 3, 'p': 0.4 },
@@ -102,7 +102,7 @@ def check_with_device(device, dtype):
             },
             {
                 'name': 'gen-neg-binomial',
-                'symbol': mx.sym.random_generalized_negative_binomial,
+                'symbol': mx.sym.random.generalized_negative_binomial,
                 'multisymbol': mx.sym.sample_generalized_negative_binomial,
                 'ndop': mx.random.generalized_negative_binomial,
                 'params': { 'mu': 2.0, 'alpha': 0.3 },
diff --git a/tests/python/unittest/test_sparse_ndarray.py b/tests/python/unittest/test_sparse_ndarray.py
index a773434..35d9713 100644
--- a/tests/python/unittest/test_sparse_ndarray.py
+++ b/tests/python/unittest/test_sparse_ndarray.py
@@ -348,7 +348,7 @@ def test_sparse_nd_transpose():
 def test_sparse_nd_output_fallback():
     shape = (10, 10)
     out = mx.nd.zeros(shape=shape, stype='row_sparse')
-    mx.nd.random_normal(shape=shape, out=out)
+    mx.nd.random.normal(shape=shape, out=out)
     assert(np.sum(out.asnumpy()) != 0)
 
 def test_sparse_nd_random():
@@ -357,7 +357,7 @@ def test_sparse_nd_random():
     if default_context().device_type is 'gpu':
         return
     shape = (100, 100)
-    fns = [mx.nd.random_uniform, mx.nd.random_normal, mx.nd.random_gamma]
+    fns = [mx.nd.random.uniform, mx.nd.random.normal, mx.nd.random.gamma]
     for fn in fns:
         rsp_out = mx.nd.zeros(shape=shape, stype='row_sparse')
         dns_out = mx.nd.zeros(shape=shape, stype='default')

-- 
To stop receiving notification emails like this one, please contact
['"commits@mxnet.apache.org" <co...@mxnet.apache.org>'].