You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by zh...@apache.org on 2021/09/28 00:28:22 UTC
[incubator-mxnet] branch master updated: [v2.0] Update Examples (#20602)

This is an automated email from the ASF dual-hosted git repository.

zhenghuijin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
     new a975637  [v2.0] Update Examples (#20602)
a975637 is described below

commit a975637b19867c839135c6e9296be97e2622b704
Author: Zhenghui Jin <69...@users.noreply.github.com>
AuthorDate: Mon Sep 27 17:27:02 2021 -0700

    [v2.0] Update Examples (#20602)
    
    * update examples
    
    * update block.py
---
 LICENSE                                            |    1 -
 example/adversary/adversary_generation.ipynb       |  222 +--
 example/autoencoder/README.md                      |   37 -
 .../autoencoder/convolutional_autoencoder.ipynb    |  538 ------
 example/automatic-mixed-precision/README.md        |   29 -
 .../amp_model_conversion.py                        |  201 --
 example/bi-lstm-sort/bi-lstm-sort.ipynb            |  301 ++-
 example/gluon/actor_critic/actor_critic.py         |   28 +-
 example/gluon/audio/README.md                      |  115 --
 example/gluon/audio/transforms.py                  |  205 ---
 example/gluon/audio/urban_sounds/README.md         |  117 --
 example/gluon/audio/urban_sounds/datasets.py       |  179 --
 example/gluon/audio/urban_sounds/model.py          |   33 -
 example/gluon/audio/urban_sounds/predict.py        |   92 -
 example/gluon/audio/urban_sounds/requirements.txt  |    2 -
 example/gluon/audio/urban_sounds/train.py          |  157 --
 example/gluon/data.py                              |   10 +-
 example/gluon/dc_gan/README.md                     |   69 -
 example/gluon/dc_gan/__init__.py                   |   16 -
 example/gluon/dc_gan/dcgan.py                      |  355 ----
 example/gluon/dc_gan/inception_score.py            |  110 --
 example/gluon/embedding_learning/README.md         |   93 -
 example/gluon/embedding_learning/data.py           |  158 --
 .../gluon/embedding_learning/get_cub200_data.sh    |   34 -
 example/gluon/embedding_learning/model.py          |  230 ---
 example/gluon/embedding_learning/train.py          |  255 ---
 .../house_prices/kaggle_k_fold_cross_validation.py |   25 +-
 example/gluon/lipnet/.gitignore                    |    3 -
 example/gluon/lipnet/BeamSearch.py                 |  170 --
 example/gluon/lipnet/README.md                     |  254 ---
 example/gluon/lipnet/asset/mouth_000.png           |  Bin 6372 -> 0 bytes
 example/gluon/lipnet/asset/mouth_001.png           |  Bin 6826 -> 0 bytes
 example/gluon/lipnet/asset/mouth_074.png           |  Bin 6864 -> 0 bytes
 example/gluon/lipnet/asset/network_structure.png   |  Bin 183728 -> 0 bytes
 example/gluon/lipnet/asset/s2_bbbf7p_000.png       |  Bin 35141 -> 0 bytes
 example/gluon/lipnet/asset/s2_bbbf7p_001.png       |  Bin 36768 -> 0 bytes
 example/gluon/lipnet/asset/s2_bbbf7p_074.png       |  Bin 38248 -> 0 bytes
 example/gluon/lipnet/checkpoint/__init__.py        |   16 -
 example/gluon/lipnet/data_loader.py                |   94 -
 example/gluon/lipnet/infer.py                      |   52 -
 example/gluon/lipnet/main.py                       |   47 -
 example/gluon/lipnet/models/__init__.py            |   16 -
 example/gluon/lipnet/models/network.py             |   73 -
 example/gluon/lipnet/requirements.txt              |    7 -
 example/gluon/lipnet/tests/test_beamsearch.py      |   42 -
 example/gluon/lipnet/trainer.py                    |  232 ---
 example/gluon/lipnet/utils/__init__.py             |   16 -
 example/gluon/lipnet/utils/align.py                |   83 -
 example/gluon/lipnet/utils/common.py               |   80 -
 example/gluon/lipnet/utils/download_data.py        |  112 --
 example/gluon/lipnet/utils/multi.py                |  104 --
 example/gluon/lipnet/utils/preprocess_data.py      |  262 ---
 example/gluon/lipnet/utils/run_preprocess.ipynb    |  194 --
 .../utils/run_preprocess_single_process.ipynb      |  360 ----
 example/gluon/lstm_crf/README.md                   |   36 -
 example/gluon/lstm_crf/lstm_crf.py                 |  241 ---
 example/gluon/mnist/mnist.py                       |    8 +-
 example/gluon/sn_gan/README.md                     |   61 -
 example/gluon/sn_gan/data.py                       |   42 -
 example/gluon/sn_gan/model.py                      |  139 --
 example/gluon/sn_gan/sn_gan_output.png             |  Bin 404415 -> 0 bytes
 example/gluon/sn_gan/train.py                      |  149 --
 example/gluon/sn_gan/utils.py                      |   49 -
 example/gluon/style_transfer/README.md             |  134 --
 example/gluon/style_transfer/data.py               |  125 --
 .../style_transfer/dataset/download_dataset.py     |   37 -
 example/gluon/style_transfer/download_images.py    |   20 -
 example/gluon/style_transfer/main.py               |  231 ---
 .../gluon/style_transfer/models/download_model.py  |   31 -
 example/gluon/style_transfer/net.py                |  296 ---
 example/gluon/style_transfer/option.py             |  109 --
 example/gluon/style_transfer/utils.py              |  229 ---
 example/gluon/super_resolution/super_resolution.py |   22 +-
 example/gluon/tree_lstm/LICENSE                    |   21 -
 example/gluon/tree_lstm/README.md                  |   46 -
 example/gluon/tree_lstm/dataset.cPickle            |  Bin 17896237 -> 0 bytes
 example/gluon/tree_lstm/dataset.py                 |  231 ---
 example/gluon/tree_lstm/fetch_and_preprocess.sh    |   25 -
 .../tree_lstm/lib/CollapseUnaryTransformer.java    |   53 -
 example/gluon/tree_lstm/lib/ConstituencyParse.java |  253 ---
 example/gluon/tree_lstm/lib/DependencyParse.java   |  159 --
 example/gluon/tree_lstm/main.py                    |  191 --
 example/gluon/tree_lstm/scripts/download.py        |  106 --
 example/gluon/tree_lstm/scripts/preprocess-sick.py |  122 --
 example/gluon/tree_lstm/tree_lstm.py               |  154 --
 example/gluon/word_language_model/README.md        |  104 --
 example/gluon/word_language_model/model-graph.png  |  Bin 126048 -> 0 bytes
 example/gluon/word_language_model/model.py         |   64 -
 example/gluon/word_language_model/train.py         |  225 ---
 example/multi-task/multi-task-learning.ipynb       |  287 ++-
 example/multi_threaded_inference/Makefile          |   65 -
 example/multi_threaded_inference/README.md         |   19 -
 .../multi_threaded_inference.cc                    |  356 ----
 example/quantization/imagenet_gen_qsym_onednn.py   |    2 +-
 example/quantization/imagenet_inference.py         |    4 +-
 example/recommenders/demo1-MF.ipynb                | 1195 +++---------
 example/recommenders/demo2-dssm.ipynb              | 1936 ++------------------
 example/recommenders/matrix_fact.py                |    2 +-
 example/recommenders/movielens_data.py             |    6 +-
 example/restricted-boltzmann-machine/README.md     |   82 -
 example/restricted-boltzmann-machine/binary_rbm.py |  253 ---
 .../binary_rbm_gluon.py                            |  142 --
 example/restricted-boltzmann-machine/samples.png   |  Bin 191570 -> 0 bytes
 example/rnn/README.md                              |   35 -
 example/rnn/bucket_R/aclImdb_lstm_classification.R |   92 -
 .../rnn/bucket_R/data_preprocessing_seq_to_one.R   |  191 --
 python/mxnet/gluon/block.py                        |    3 +-
 107 files changed, 883 insertions(+), 13329 deletions(-)

diff --git a/LICENSE b/LICENSE
index 1eca980..ded6a01 100644
--- a/LICENSE
+++ b/LICENSE
@@ -242,7 +242,6 @@
 
     3rdparty/miniz/miniz.c
     3rdparty/miniz/miniz.h
-    example/gluon/tree_lstm
     3rdparty/tvm/3rdparty/cma
     3rdparty/onnx-tensorrt
     3rdparty/onnx-tensorrt/third_party/onnx
diff --git a/example/adversary/adversary_generation.ipynb b/example/adversary/adversary_generation.ipynb
index 0dda371..9f8cf99 100644
--- a/example/adversary/adversary_generation.ipynb
+++ b/example/adversary/adversary_generation.ipynb
@@ -2,7 +2,6 @@
  "cells": [
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "# Fast Sign Adversary Generation Example\n",
     "\n",
@@ -10,15 +9,12 @@
     "\n",
     "[1] Goodfellow, Ian J., Jonathon Shlens, and Christian Szegedy. \"Explaining and harnessing adversarial examples.\" arXiv preprint arXiv:1412.6572 (2014).\n",
     "https://arxiv.org/abs/1412.6572"
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 1,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [],
    "source": [
     "%matplotlib inline\n",
     "import mxnet as mx\n",
@@ -28,39 +24,41 @@
     "import matplotlib.cm as cm\n",
     "\n",
     "from mxnet import gluon"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {
+    "collapsed": false
+   }
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "Build simple CNN network for solving the MNIST dataset digit recognition task"
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 17,
-   "metadata": {
-    "collapsed": true
-   },
-   "outputs": [],
    "source": [
     "ctx = mx.gpu() if mx.context.num_gpus() else mx.cpu()\n",
     "batch_size = 128"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {
+    "collapsed": true
+   }
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "## Data Loading"
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 3,
-   "metadata": {},
-   "outputs": [],
    "source": [
     "transform = lambda x,y: (x.transpose((2,0,1)).astype('float32')/255., y)\n",
     "\n",
@@ -69,22 +67,20 @@
     "\n",
     "train_data = gluon.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=5)\n",
     "test_data = gluon.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "## Create the network"
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 4,
-   "metadata": {
-    "collapsed": true
-   },
-   "outputs": [],
    "source": [
     "net = gluon.nn.HybridSequential()\n",
     "with net.name_scope():\n",
@@ -97,73 +93,63 @@
     "        gluon.nn.Dense(500, activation='tanh'),\n",
     "        gluon.nn.Dense(10)\n",
     "    )"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {
+    "collapsed": true
+   }
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "## Initialize training"
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 5,
-   "metadata": {
-    "collapsed": true
-   },
-   "outputs": [],
    "source": [
     "net.initialize(mx.initializer.Uniform(), ctx=ctx)\n",
     "net.hybridize()"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {
+    "collapsed": true
+   }
   },
   {
    "cell_type": "code",
    "execution_count": 6,
-   "metadata": {
-    "collapsed": true
-   },
-   "outputs": [],
    "source": [
     "loss = gluon.loss.SoftmaxCELoss()"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {
+    "collapsed": true
+   }
   },
   {
    "cell_type": "code",
    "execution_count": 7,
-   "metadata": {
-    "collapsed": true
-   },
-   "outputs": [],
    "source": [
     "trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.1, 'momentum':0.95})"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {
+    "collapsed": true
+   }
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "## Training loop"
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 8,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Train Accuracy: 0.92\t Train Loss: 0.32142\n",
-      "Train Accuracy: 0.97\t Train Loss: 0.16773\n",
-      "Train Accuracy: 0.97\t Train Loss: 0.14660\n"
-     ]
-    }
-   ],
    "source": [
     "epoch = 3\n",
     "for e in range(epoch):\n",
@@ -180,35 +166,39 @@
     "        l.backward()\n",
     "        trainer.update(data.shape[0])\n",
     "        \n",
-    "        train_loss += l.mean().asscalar()\n",
+    "        train_loss += l.mean().item()\n",
     "        acc.update(label, output)\n",
     "    \n",
     "    print(\"Train Accuracy: %.2f\\t Train Loss: %.5f\" % (acc.get()[1], train_loss/(i+1)))"
-   ]
+   ],
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "Train Accuracy: 0.92\t Train Loss: 0.32142\n",
+      "Train Accuracy: 0.97\t Train Loss: 0.16773\n",
+      "Train Accuracy: 0.97\t Train Loss: 0.14660\n"
+     ]
+    }
+   ],
+   "metadata": {
+    "collapsed": false
+   }
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "## Perturbation\n",
     "\n",
     "We first run a validation batch and measure the resulting accuracy.\n",
     "We then perturbate this batch by modifying the input in the opposite direction of the gradient."
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 9,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Validation batch accuracy 0.96875\n"
-     ]
-    }
-   ],
    "source": [
     "# Get a batch from the testing set\n",
     "for data, label in test_data:\n",
@@ -227,32 +217,30 @@
     "acc.update(label, output)\n",
     "\n",
     "print(\"Validation batch accuracy {}\".format(acc.get()[1]))"
-   ]
+   ],
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "Validation batch accuracy 0.96875\n"
+     ]
+    }
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "Now we perturb the input"
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 10,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Validation batch accuracy after perturbation 0.40625\n"
-     ]
-    }
-   ],
    "source": [
-    "data_perturbated = data + 0.15 * mx.nd.sign(data.grad)\n",
+    "data_perturbated = data + 0.15 * mx.np.sign(data.grad)\n",
     "\n",
     "output = net(data_perturbated)    \n",
     "\n",
@@ -260,58 +248,70 @@
     "acc.update(label, output)\n",
     "\n",
     "print(\"Validation batch accuracy after perturbation {}\".format(acc.get()[1]))"
-   ]
+   ],
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "Validation batch accuracy after perturbation 0.40625\n"
+     ]
+    }
+   ],
+   "metadata": {
+    "collapsed": false
+   }
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "## Visualization"
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "Let's visualize an example after pertubation.\n",
     "\n",
     "We can see that the prediction is often incorrect."
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 16,
-   "metadata": {
-    "collapsed": false
-   },
+   "source": [
+    "from random import randint\n",
+    "idx = randint(0, batch_size-1)\n",
+    "\n",
+    "plt.imshow(data_perturbated[idx, :].asnumpy().reshape(28,28), cmap=cm.Greys_r)\n",
+    "print(\"true label: %d\" % label.asnumpy()[idx])\n",
+    "print(\"predicted: %d\" % np.argmax(output.asnumpy(), axis=1)[idx])"
+   ],
    "outputs": [
     {
-     "name": "stdout",
      "output_type": "stream",
+     "name": "stdout",
      "text": [
       "true label: 1\n",
       "predicted: 3\n"
      ]
     },
     {
+     "output_type": "display_data",
      "data": {
-      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAP8AAAD8CAYAAAC4nHJkAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAADpJJREFUeJzt3V+IXeW5x/Hfc9JsNbbMmLbGkAQdgxwZAxoZY+EMJy1tgo2F2AuluSg5IE0vIrbQi4q9qJeh9A9eSHGqobG2ScVWDConsaFgS0p1FI/G8VRNSWmGJGOxpCnIjJk8vdgrZYx7r7Wz1989z/cDw+xZ715rPbMmv6y997vW+5q7C0A8/1F3AQDqQfiBoAg/EBThB4Ii/EBQhB8IivADQRF+ICjCDwT1sSp31mq1fNmyZaVs+/Tp06Vs97yhoaHa9p0lrbYmq/O41X3M0n73 [...]
       "text/plain": [
        "<Figure size 432x288 with 1 Axes>"
-      ]
+      ],
+      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAP8AAAD8CAYAAAC4nHJkAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAADpJJREFUeJzt3V+IXeW5x/Hfc9JsNbbMmLbGkAQdgxwZAxoZY+EMJy1tgo2F2AuluSg5IE0vIrbQi4q9qJeh9A9eSHGqobG2ScVWDConsaFgS0p1FI/G8VRNSWmGJGOxpCnIjJk8vdgrZYx7r7Wz1989z/cDw+xZ715rPbMmv6y997vW+5q7C0A8/1F3AQDqQfiBoAg/EBThB4Ii/EBQhB8IivADQRF+ICjCDwT1sSp31mq1fNmyZaVs+/Tp06Vs97yhoaHa9p0lrbYmq/O41X3M0n73 [...]
      },
-     "metadata": {},
-     "output_type": "display_data"
+     "metadata": {}
     }
    ],
-   "source": [
-    "from random import randint\n",
-    "idx = randint(0, batch_size-1)\n",
-    "\n",
-    "plt.imshow(data_perturbated[idx, :].asnumpy().reshape(28,28), cmap=cm.Greys_r)\n",
-    "print(\"true label: %d\" % label.asnumpy()[idx])\n",
-    "print(\"predicted: %d\" % np.argmax(output.asnumpy(), axis=1)[idx])"
-   ]
+   "metadata": {
+    "collapsed": false
+   }
   }
  ],
  "metadata": {
@@ -335,4 +335,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 2
-}
+}
\ No newline at end of file
diff --git a/example/autoencoder/README.md b/example/autoencoder/README.md
deleted file mode 100644
index 9db075e..0000000
--- a/example/autoencoder/README.md
+++ /dev/null
@@ -1,37 +0,0 @@
-<!--- Licensed to the Apache Software Foundation (ASF) under one -->
-<!--- or more contributor license agreements.  See the NOTICE file -->
-<!--- distributed with this work for additional information -->
-<!--- regarding copyright ownership.  The ASF licenses this file -->
-<!--- to you under the Apache License, Version 2.0 (the -->
-<!--- "License"); you may not use this file except in compliance -->
-<!--- with the License.  You may obtain a copy of the License at -->
-
-<!---   http://www.apache.org/licenses/LICENSE-2.0 -->
-
-<!--- Unless required by applicable law or agreed to in writing, -->
-<!--- software distributed under the License is distributed on an -->
-<!--- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -->
-<!--- KIND, either express or implied.  See the License for the -->
-<!--- specific language governing permissions and limitations -->
-<!--- under the License. -->
-
-# Example of a Convolutional Autoencoder
-
-Autoencoder architectures are often used for unsupervised feature learning. This [link](http://ufldl.stanford.edu/tutorial/unsupervised/Autoencoders/) contains an introduction tutorial to autoencoders. This example illustrates a simple autoencoder using a stack of convolutional layers for both the encoder and the decoder. 
-
-
-![](https://cdn-images-1.medium.com/max/800/1*LSYNW5m3TN7xRX61BZhoZA.png)
-
-([Diagram source](https://towardsdatascience.com/autoencoders-introduction-and-implementation-3f40483b0a85))
-
-
-The idea of an autoencoder is to learn to use bottleneck architecture to encode the input and then try to decode it to reproduce the original. By doing so, the network learns to effectively compress the information of the input, the resulting embedding representation can then be used in several domains. For example as featurized representation for visual search, or in anomaly detection.
-
-## Dataset
-
-The dataset used in this example is [FashionMNIST](https://github.com/zalandoresearch/fashion-mnist) dataset. 
-
-## Variational Autoencoder
-
-You can check an example of variational autoencoder [here](https://gluon.mxnet.io/chapter13_unsupervised-learning/vae-gluon.html)
-
diff --git a/example/autoencoder/convolutional_autoencoder.ipynb b/example/autoencoder/convolutional_autoencoder.ipynb
deleted file mode 100644
index a18ee55..0000000
--- a/example/autoencoder/convolutional_autoencoder.ipynb
+++ /dev/null
@@ -1,538 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Convolutional Autoencoder"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "![](https://cdn-images-1.medium.com/max/800/1*LSYNW5m3TN7xRX61BZhoZA.png)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "In this example we will demonstrate how you can create a convolutional autoencoder in Gluon"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import random\n",
-    "\n",
-    "import matplotlib.pyplot as plt\n",
-    "import mxnet as mx\n",
-    "from mxnet import autograd, gluon"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Data\n",
-    "\n",
-    "We will use the FashionMNIST dataset, which is of a similar format to MNIST but is richer and has more variance"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "batch_size = 512\n",
-    "ctx = mx.gpu() if mx.context.num_gpus() else mx.cpu()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "transform = lambda x,y: (x.transpose((2,0,1)).astype('float32')/255., y)\n",
-    "\n",
-    "train_dataset = gluon.data.vision.FashionMNIST(train=True)\n",
-    "test_dataset = gluon.data.vision.FashionMNIST(train=False)\n",
-    "\n",
-    "train_dataset_t = train_dataset.transform(transform)\n",
-    "test_dataset_t = test_dataset.transform(transform)\n",
-    "\n",
-    "train_data = gluon.data.DataLoader(train_dataset_t, batch_size=batch_size, last_batch='rollover', shuffle=True, num_workers=5)\n",
-    "test_data = gluon.data.DataLoader(test_dataset_t, batch_size=batch_size, last_batch='rollover', shuffle=True, num_workers=5)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "image/png": "iVBORw0KGgoAAAANSUhEUgAABIEAAACBCAYAAABXearSAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAIABJREFUeJztnXm4VmW5/2+q0+QQ5iwITkwOCKKghWLOSuZsNqk5HI+WiXoqT9ox09LqKr2wKK/UIjNLvRrMIU3AMENESHECkUkEHBFTGk51PH/8fjx9n297Pb1uNnu/77s+n7/utZ9nr7Xe9YxrXff3vnu9/vrrAQAAAAAAAAAA7c2bevoGAAAAAAAAAABg7cNHIAAAAAAAAACAGsBHIAAAAAAAAACAGsBHIAAAAAAAAACAGsBHIAAAAAAAAACAGsBHIAAAAAAAAACAGsBHIAAAAAAA [...]
-      "text/plain": [
-       "<Figure size 1440x720 with 10 Axes>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "plt.figure(figsize=(20,10))\n",
-    "for i in range(10):\n",
-    "    ax = plt.subplot(1, 10, i+1)\n",
-    "    ax.imshow(train_dataset[i][0].squeeze().asnumpy(), cmap='gray')\n",
-    "    ax.axis('off')"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Network"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "net = gluon.nn.HybridSequential()\n",
-    "encoder = gluon.nn.HybridSequential()\n",
-    "encoder.add(\n",
-    "    gluon.nn.Conv2D(channels=4, kernel_size=3, padding=1, strides=(2,2), activation='relu'),\n",
-    "    gluon.nn.BatchNorm(),\n",
-    "    gluon.nn.Conv2D(channels=8, kernel_size=3, padding=1, strides=(2,2), activation='relu'),\n",
-    "    gluon.nn.BatchNorm(),\n",
-    "    gluon.nn.Conv2D(channels=16, kernel_size=3, padding=1, strides=(2,2), activation='relu'),\n",
-    "    gluon.nn.BatchNorm(),\n",
-    "    gluon.nn.Conv2D(channels=32, kernel_size=3, padding=0, strides=(2,2),activation='relu'),\n",
-    "    gluon.nn.BatchNorm()\n",
-    ")\n",
-    "decoder = gluon.nn.HybridSequential()\n",
-    "decoder.add(\n",
-    "    gluon.nn.Conv2D(channels=32, kernel_size=3, padding=2, activation='relu'),\n",
-    "    gluon.nn.HybridLambda(lambda F, x: F.UpSampling(x, scale=2, sample_type='nearest')),\n",
-    "    gluon.nn.BatchNorm(),\n",
-    "    gluon.nn.Conv2D(channels=16, kernel_size=3, padding=1, activation='relu'),\n",
-    "    gluon.nn.HybridLambda(lambda F, x: F.UpSampling(x, scale=2, sample_type='nearest')),\n",
-    "    gluon.nn.BatchNorm(),\n",
-    "    gluon.nn.Conv2D(channels=8, kernel_size=3, padding=2, activation='relu'),\n",
-    "    gluon.nn.HybridLambda(lambda F, x: F.UpSampling(x, scale=2, sample_type='nearest')),\n",
-    "    gluon.nn.BatchNorm(),\n",
-    "    gluon.nn.Conv2D(channels=4, kernel_size=3, padding=1, activation='relu'),\n",
-    "    gluon.nn.Conv2D(channels=1, kernel_size=3, padding=1, activation='sigmoid')\n",
-    ")\n",
-    "net.add(\n",
-    "        encoder,\n",
-    "        decoder\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "net.initialize(ctx=ctx)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "--------------------------------------------------------------------------------\n",
-      "        Layer (type)                                Output Shape         Param #\n",
-      "================================================================================\n",
-      "               Input                              (1, 1, 28, 28)               0\n",
-      "        Activation-1  <Symbol autoencoder_encoder_conv0_relu_fwd>               0\n",
-      "        Activation-2                              (1, 4, 14, 14)               0\n",
-      "            Conv2D-3                              (1, 4, 14, 14)              40\n",
-      "         BatchNorm-4                              (1, 4, 14, 14)              16\n",
-      "        Activation-5  <Symbol autoencoder_encoder_conv1_relu_fwd>               0\n",
-      "        Activation-6                                (1, 8, 7, 7)               0\n",
-      "            Conv2D-7                                (1, 8, 7, 7)             296\n",
-      "         BatchNorm-8                                (1, 8, 7, 7)              32\n",
-      "        Activation-9  <Symbol autoencoder_encoder_conv2_relu_fwd>               0\n",
-      "       Activation-10                               (1, 16, 4, 4)               0\n",
-      "           Conv2D-11                               (1, 16, 4, 4)            1168\n",
-      "        BatchNorm-12                               (1, 16, 4, 4)              64\n",
-      "       Activation-13  <Symbol autoencoder_encoder_conv3_relu_fwd>               0\n",
-      "       Activation-14                               (1, 32, 1, 1)               0\n",
-      "           Conv2D-15                               (1, 32, 1, 1)            4640\n",
-      "        BatchNorm-16                               (1, 32, 1, 1)             128\n",
-      "       Activation-17  <Symbol autoencoder_decoder_conv0_relu_fwd>               0\n",
-      "       Activation-18                               (1, 32, 3, 3)               0\n",
-      "           Conv2D-19                               (1, 32, 3, 3)            9248\n",
-      "     HybridLambda-20                               (1, 32, 6, 6)               0\n",
-      "        BatchNorm-21                               (1, 32, 6, 6)             128\n",
-      "       Activation-22  <Symbol autoencoder_decoder_conv1_relu_fwd>               0\n",
-      "       Activation-23                               (1, 16, 6, 6)               0\n",
-      "           Conv2D-24                               (1, 16, 6, 6)            4624\n",
-      "     HybridLambda-25                             (1, 16, 12, 12)               0\n",
-      "        BatchNorm-26                             (1, 16, 12, 12)              64\n",
-      "       Activation-27  <Symbol autoencoder_decoder_conv2_relu_fwd>               0\n",
-      "       Activation-28                              (1, 8, 14, 14)               0\n",
-      "           Conv2D-29                              (1, 8, 14, 14)            1160\n",
-      "     HybridLambda-30                              (1, 8, 28, 28)               0\n",
-      "        BatchNorm-31                              (1, 8, 28, 28)              32\n",
-      "       Activation-32  <Symbol autoencoder_decoder_conv3_relu_fwd>               0\n",
-      "       Activation-33                              (1, 4, 28, 28)               0\n",
-      "           Conv2D-34                              (1, 4, 28, 28)             292\n",
-      "       Activation-35  <Symbol autoencoder_decoder_conv4_sigmoid_fwd>               0\n",
-      "       Activation-36                              (1, 1, 28, 28)               0\n",
-      "           Conv2D-37                              (1, 1, 28, 28)              37\n",
-      "================================================================================\n",
-      "Parameters in forward computation graph, duplicate included\n",
-      "   Total params: 21969\n",
-      "   Trainable params: 21737\n",
-      "   Non-trainable params: 232\n",
-      "Shared params in forward computation graph: 0\n",
-      "Unique parameters in model: 21969\n",
-      "--------------------------------------------------------------------------------\n"
-     ]
-    }
-   ],
-   "source": [
-    "net.summary(test_dataset_t[0][0].expand_dims(axis=0).as_in_context(ctx))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "We can see that the original image goes from 28x28 = 784 pixels to a vector of length 32. That is a ~25x information compression rate.\n",
-    "Then the decoder brings back this compressed information to the original shape"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "l2_loss = gluon.loss.L2Loss()\n",
-    "l1_loss = gluon.loss.L1Loss()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "trainer = gluon.Trainer(net.collect_params(), 'adam', {'learning_rate': 0.001, 'wd':0.001})\n",
-    "net.hybridize(static_shape=True, static_alloc=True)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Training loop"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch [0], Loss 0.2246280246310764\n",
-      "Epoch [1], Loss 0.14493223337026742\n",
-      "Epoch [2], Loss 0.13147933666522688\n",
-      "Epoch [3], Loss 0.12138325943906084\n",
-      "Epoch [4], Loss 0.11291297684367906\n",
-      "Epoch [5], Loss 0.10611823453741559\n",
-      "Epoch [6], Loss 0.09942417470817892\n",
-      "Epoch [7], Loss 0.09408332955124032\n",
-      "Epoch [8], Loss 0.08883619716024807\n",
-      "Epoch [9], Loss 0.08491455795418502\n",
-      "Epoch [10], Loss 0.0809355994402352\n",
-      "Epoch [11], Loss 0.07784551636785524\n",
-      "Epoch [12], Loss 0.07570812029716296\n",
-      "Epoch [13], Loss 0.07417513366438384\n",
-      "Epoch [14], Loss 0.07218785571236895\n",
-      "Epoch [15], Loss 0.07093704352944584\n",
-      "Epoch [16], Loss 0.0700181406787318\n",
-      "Epoch [17], Loss 0.0689836893326197\n",
-      "Epoch [18], Loss 0.06782063459738708\n",
-      "Epoch [19], Loss 0.06713279088338216\n"
-     ]
-    }
-   ],
-   "source": [
-    "epochs = 20\n",
-    "for e in range(epochs):\n",
-    "    curr_loss = 0.\n",
-    "    for i, (data, _) in enumerate(train_data):\n",
-    "        data = data.as_in_context(ctx)\n",
-    "        with autograd.record():\n",
-    "            output = net(data)\n",
-    "            # Compute the L2 and L1 losses between the original and the generated image\n",
-    "            l2 = l2_loss(output.flatten(), data.flatten())\n",
-    "            l1 = l1_loss(output.flatten(), data.flatten())\n",
-    "            l =  l2 + l1 \n",
-    "        l.backward()\n",
-    "        trainer.step(data.shape[0])\n",
-    "        \n",
-    "        curr_loss += l.mean()\n",
-    "\n",
-    "    print(\"Epoch [{}], Loss {}\".format(e, curr_loss.asscalar()/(i+1)))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Testing reconstruction"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "We plot 10 images and their reconstruction by the autoencoder. The results are pretty good for a ~25x compression rate!"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "image/png": "iVBORw0KGgoAAAANSUhEUgAABIEAAAD4CAYAAAB7VPbbAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAIABJREFUeJzsvWe8XVXVvj1QsQKBkJCQQiqBhEQ6gRBaIIAoIE1BimAB8UcVBfUvIGIBFSygPHZ/IihFRVSU3psGQpASEtJIJw0CKPqIvB98mc8975w13Qmn7LPXdX0aO3Oetdeefa2Me4y1Xn311QAAAAAAAAAAgNbmDV19AwAAAAAAAAAA0PHwEggAAAAAAAAAoAbwEggAAAAAAAAAoAbwEggAAAAAAAAAoAbwEggAAAAAAAAAoAbwEggAAAAAAAAAoAbwEggAAAAAAAAAoAbwEggA [...]
-      "text/plain": [
-       "<Figure size 1440x288 with 20 Axes>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "plt.figure(figsize=(20,4))\n",
-    "for i in range(10):\n",
-    "    idx = random.randint(0, len(test_dataset))\n",
-    "    img, _ = test_dataset[idx]\n",
-    "    x, _ = test_dataset_t[idx]\n",
-    "\n",
-    "    data = x.as_in_context(ctx).expand_dims(axis=0)\n",
-    "    output = net(data)\n",
-    "    \n",
-    "    ax = plt.subplot(2, 10, i+1)\n",
-    "    ax.imshow(img.squeeze().asnumpy(), cmap='gray')\n",
-    "    ax.axis('off')\n",
-    "    ax = plt.subplot(2, 10, 10+i+1)\n",
-    "    ax.imshow((output[0].asnumpy() * 255.).transpose((1,2,0)).squeeze(), cmap='gray')\n",
-    "    _ = ax.axis('off')"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Manipulating latent space"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "We now use separately the **encoder** that takes an image to a latent vector and the **decoder** that transform a latent vector into images"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "We get two images from the testing set"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 26,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAJIAAACPCAYAAAARM4LLAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAACsxJREFUeJztnduLFdkVxr9le7/ftdXWUdFRCUJkCMYEEaOo8zIP4hWCoOBLAgkEzEzyByiCeRCDIEYnD9EYiKAEYYjaAwbjoNHBqENPa7z1qPF+v7buPHR5sven59Q5fbbn1LG/HzRdX+06Vbu7V++9au1Vq8w5ByHKpVO1OyDeD2RIIgoyJBEFGZKIggxJREGGJKIgQxJRkCGJKJRlSGY238yazOysmX0aq1Oi9rD2RrbNrA7AtwDmAmgBcBTAMufcmQKfURi99rjpnBuSdlA5I9IP [...]
-      "text/plain": [
-       "<Figure size 144x144 with 1 Axes>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "<matplotlib.image.AxesImage at 0x7f04995adc50>"
-      ]
-     },
-     "execution_count": 26,
-     "metadata": {},
-     "output_type": "execute_result"
-    },
-    {
-     "data": {
-      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAJIAAACPCAYAAAARM4LLAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAACZRJREFUeJztnUtsVdcVhv+Feb8JD2Nsg4OwKjFAqhRVoFYC0SJoJmFUBUHEIBKTVmqlSCRph0zKpLNOkEDpoHJVqZWSQSSrRNSoUIE9iKgJAkwRD2Owzdvmadgd3Bv37D/xvde+y/eew/k/yeL851zfsxP93nudvddex0IIEKJaZtS7AeLNQEYSLshIwgUZSbggIwkXZCThgowkXJCRhAtVGcnMdpnZRTPrM7NPvBolsodNdWbbzBoAXAKwA8BNAN0A9oQQvinxO6mdRm9tbS15fWxs [...]
-      "text/plain": [
-       "<Figure size 144x144 with 1 Axes>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "idx = random.randint(0, len(test_dataset))\n",
-    "img1, _ = test_dataset[idx]\n",
-    "x, _ = test_dataset_t[idx]\n",
-    "data1 = x.as_in_context(ctx).expand_dims(axis=0)\n",
-    "\n",
-    "idx = random.randint(0, len(test_dataset))\n",
-    "img2, _ = test_dataset[idx]\n",
-    "x, _ = test_dataset_t[idx]\n",
-    "data2 = x.as_in_context(ctx).expand_dims(axis=0)\n",
-    "\n",
-    "plt.figure(figsize=(2,2))\n",
-    "plt.imshow(img1.squeeze().asnumpy(), cmap='gray')\n",
-    "plt.show()\n",
-    "plt.figure(figsize=(2,2))\n",
-    "plt.imshow(img2.squeeze().asnumpy(), cmap='gray')"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "We get the latent representations of the images by passing them through the network"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 27,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "latent1 = encoder(data1)\n",
-    "latent2 = encoder(data2)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "We see that the latent vector is made of 32 components"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 28,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(1, 32, 1, 1)"
-      ]
-     },
-     "execution_count": 28,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "latent1.shape"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "We interpolate the two latent representations, vectors of 32 values, to get a new intermediate latent representation, pass it through the decoder and plot the resulting decoded image"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 29,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "image/png": "iVBORw0KGgoAAAANSUhEUgAABIEAAACBCAYAAABXearSAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAIABJREFUeJzt3WmMXlUdx/GDC/vSZVraaaEtpS1d6AIFoYSqBRUqpCAISCIkShBBUZREjb4QQnjhQmKMJmCihqAoCCIKmoJhL1vL0tZSoHTfpmVaWmhR2XyBHH7nN3MOT6fzzDzz3O/n1X9679zn9p57zr3z5Pz/Z4933nknAAAAAAAAoLl9qLdPAAAAAAAAAPXHl0AAAAAAAAAVwJdAAAAAAAAAFcCXQAAAAAAAABXAl0AAAAAAAAAVwJdAAAAAAAAAFcCXQAAAAAAAABXAl0AAAAAA [...]
-      "text/plain": [
-       "<Figure size 1440x360 with 10 Axes>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "num = 10\n",
-    "plt.figure(figsize=(20, 5))\n",
-    "\n",
-    "for i in range(int(num)):\n",
-    "    \n",
-    "        new_latent = latent2*(i+1)/num + latent1*(num-i)/num\n",
-    "        output = decoder(new_latent)\n",
-    "        \n",
-    "        #plot result\n",
-    "        ax = plt.subplot(1, num, i+1)\n",
-    "        ax.imshow((output[0].asnumpy() * 255.).transpose((1,2,0)).squeeze(), cmap='gray')\n",
-    "        _ = ax.axis('off')"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "We can see that the latent space learnt by the autoencoder is fairly smooth, there is no sudden jump from one shape to another"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.6.4"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/example/automatic-mixed-precision/README.md b/example/automatic-mixed-precision/README.md
deleted file mode 100644
index 334828a..0000000
--- a/example/automatic-mixed-precision/README.md
+++ /dev/null
@@ -1,29 +0,0 @@
-<!--- Licensed to the Apache Software Foundation (ASF) under one -->
-<!--- or more contributor license agreements.  See the NOTICE file -->
-<!--- distributed with this work for additional information -->
-<!--- regarding copyright ownership.  The ASF licenses this file -->
-<!--- to you under the Apache License, Version 2.0 (the -->
-<!--- "License"); you may not use this file except in compliance -->
-<!--- with the License.  You may obtain a copy of the License at -->
-
-<!---   http://www.apache.org/licenses/LICENSE-2.0 -->
-
-<!--- Unless required by applicable law or agreed to in writing, -->
-<!--- software distributed under the License is distributed on an -->
-<!--- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -->
-<!--- KIND, either express or implied.  See the License for the -->
-<!--- specific language governing permissions and limitations -->
-<!--- under the License. -->
-
-# Conversion of FP32 models to Mixed Precision Models
-
-
-This folder contains examples for converting FP32 models to mixed precision models. The script allows for converting FP32 symbolic models or gluon models to mixed precision model.
-
-## Basic Usages
-
-AMP Model Conversion for a gluon model, casting the params wherever possible to FP16. The below script will convert the `resnet101_v1` model to Mixed Precision Model and cast params to FP16 wherever possible, load this converted model and run inference on it.
-
-```bash
-python amp_model_conversion.py --model resnet101_v1  --run-dummy-inference --cast-optional-params
-```
diff --git a/example/automatic-mixed-precision/amp_model_conversion.py b/example/automatic-mixed-precision/amp_model_conversion.py
deleted file mode 100644
index 22af4f3..0000000
--- a/example/automatic-mixed-precision/amp_model_conversion.py
+++ /dev/null
@@ -1,201 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import os
-import logging
-import argparse
-import mxnet as mx
-from common import modelzoo
-import gluoncv
-from gluoncv.model_zoo import get_model
-from mxnet import amp
-import numpy as np
-
-
-def save_symbol(fname, sym, logger=None):
-    if logger is not None:
-        logger.info('Saving symbol into file at {}'.format(fname))
-    sym.save(fname, remove_amp_cast=False)
-
-
-def save_params(fname, arg_params, aux_params, logger=None):
-    if logger is not None:
-        logger.info('Saving params into file at {}'.format(fname))
-    save_dict = {('arg:%s' % k): v.as_in_context(mx.cpu()) for k, v in arg_params.items()}
-    save_dict.update({('aux:%s' % k): v.as_in_context(mx.cpu()) for k, v in aux_params.items()})
-    mx.nd.save(fname, save_dict)
-
-
-if __name__ == '__main__':
-    # Faster RCNN and Mask RCNN commented because of model loading issues
-    # https://github.com/dmlc/gluon-cv/issues/1034
-    gluon_models = [#'faster_rcnn_fpn_resnet50_v1b_coco',
-                    'mobilenetv2_0.75',
-                    'cifar_resnet56_v1',
-                    'mobilenet0.25',
-                    'mobilenet1.0',
-                    #'mask_rcnn_fpn_resnet50_v1b_coco',
-                    'simple_pose_resnet152_v1b',
-                    'ssd_512_resnet50_v1_voc',
-                    #'faster_rcnn_resnet50_v1b_voc',
-                    'cifar_resnet20_v1',
-                    'yolo3_darknet53_voc',
-                    'resnet101_v1c',
-                    'simple_pose_resnet18_v1b',
-                    #'mask_rcnn_resnet50_v1b_coco',
-                    'ssd_512_mobilenet1.0_coco',
-                    'vgg19_bn',
-                    #'faster_rcnn_resnet50_v1b_coco',
-                    'cifar_resnet110_v1',
-                    'yolo3_mobilenet1.0_voc',
-                    'cifar_resnext29_16x64d',
-                    'resnet34_v1',
-                    'densenet121',
-                    #'mask_rcnn_fpn_resnet101_v1d_coco',
-                    'vgg13_bn',
-                    'vgg19',
-                    'resnet152_v1d',
-                    'resnet152_v1s',
-                    'densenet201',
-                    'alexnet',
-                    'se_resnext50_32x4d',
-                    'resnet50_v1d_0.86',
-                    'resnet18_v1b_0.89',
-                    'yolo3_darknet53_coco',
-                    'resnet152_v1',
-                    'resnext101_64x4d',
-                    'vgg13',
-                    'resnet101_v1d_0.76',
-                    'simple_pose_resnet50_v1d',
-                    'senet_154',
-                    'resnet50_v1',
-                    'se_resnext101_32x4d',
-                    'fcn_resnet101_voc',
-                    'resnet152_v2',
-                    #'mask_rcnn_resnet101_v1d_coco',
-                    'squeezenet1.1',
-                    'mobilenet0.5',
-                    'resnet34_v2',
-                    'resnet18_v1',
-                    'resnet152_v1b',
-                    'resnet101_v2',
-                    'cifar_resnet56_v2',
-                    'ssd_512_resnet101_v2_voc',
-                    'resnet50_v1d_0.37',
-                    'mobilenetv2_0.5',
-                    #'faster_rcnn_fpn_bn_resnet50_v1b_coco',
-                    'resnet50_v1c',
-                    'densenet161',
-                    'simple_pose_resnet50_v1b',
-                    'resnet18_v1b',
-                    'darknet53',
-                    'fcn_resnet50_ade',
-                    'cifar_wideresnet28_10',
-                    'simple_pose_resnet101_v1d',
-                    'vgg16',
-                    'ssd_512_resnet50_v1_coco',
-                    'resnet101_v1d_0.73',
-                    'squeezenet1.0',
-                    'resnet50_v1b',
-                    #'faster_rcnn_resnet101_v1d_coco',
-                    'ssd_512_mobilenet1.0_voc',
-                    'cifar_wideresnet40_8',
-                    'cifar_wideresnet16_10',
-                    'cifar_resnet110_v2',
-                    'resnet101_v1s',
-                    'mobilenetv2_0.25',
-                    'resnet152_v1c',
-                    'se_resnext101_64x4d',
-                    #'faster_rcnn_fpn_resnet101_v1d_coco',
-                    'resnet50_v1d',
-                    'densenet169',
-                    'resnet34_v1b',
-                    'resnext50_32x4d',
-                    'resnet101_v1',
-                    'resnet101_v1b',
-                    'resnet50_v1s',
-                    'mobilenet0.75',
-                    'cifar_resnet20_v2',
-                    'resnet101_v1d',
-                    'vgg11_bn',
-                    'resnet18_v2',
-                    'vgg11',
-                    'simple_pose_resnet101_v1b',
-                    'resnext101_32x4d',
-                    'resnet50_v2',
-                    'vgg16_bn',
-                    'mobilenetv2_1.0',
-                    'resnet50_v1d_0.48',
-                    'resnet50_v1d_0.11',
-                    'fcn_resnet101_ade',
-                    'simple_pose_resnet152_v1d',
-                    'yolo3_mobilenet1.0_coco',
-                    'fcn_resnet101_coco']
-    # TODO(anisub): add support for other models from gluoncv
-    # Not supported today mostly because of broken net.forward calls
-    segmentation_models = ['deeplab_resnet50_ade',
-                           'psp_resnet101_voc',
-                           'deeplab_resnet152_voc',
-                           'deeplab_resnet101_ade',
-                           'deeplab_resnet152_coco',
-                           'psp_resnet101_ade',
-                           'deeplab_resnet101_coco',
-                           'psp_resnet101_citys',
-                           'psp_resnet50_ade',
-                           'psp_resnet101_coco',
-                           'deeplab_resnet101_voc']
-    calib_ssd_models = ["ssd_512_vgg16_atrous_voc",
-                        "ssd_300_vgg16_atrous_voc",
-                        "ssd_300_vgg16_atrous_coco"]
-    calib_inception_models = ["inceptionv3"]
-    gluon_models = gluon_models + segmentation_models + \
-                   calib_ssd_models + calib_inception_models
-    models = gluon_models
-
-    parser = argparse.ArgumentParser(description='Convert a provided FP32 model to a mixed precision model')
-    parser.add_argument('--model', type=str, choices=models)
-    parser.add_argument('--run-dummy-inference', action='store_true', default=False,
-                        help='Will generate random input of shape (1, 3, 224, 224) '
-                             'and run a dummy inference forward pass')
-    parser.add_argument('--cast-optional-params', action='store_true', default=False,
-                        help='If enabled, will try to cast params to target dtype wherever possible')
-    args = parser.parse_args()
-    logging.basicConfig()
-    logger = logging.getLogger('logger')
-    logger.setLevel(logging.INFO)
-
-    assert args.model in gluon_models, "Please choose one of the available gluon models: {}".format(gluon_models)
-    shape = None
-    if args.model in segmentation_models:
-        shape = (1, 3, 480, 480)
-    elif args.model in calib_ssd_models:
-        shape = (1, 3, 512, 544)
-    elif args.model in calib_inception_models:
-        shape = (1, 3, 299, 299)
-    else:
-        shape = (1, 3, 224, 224)
-    net = gluoncv.model_zoo.get_model(args.model, pretrained=True)
-    net.hybridize()
-    result_before1 = net.forward(mx.nd.random.uniform(shape=shape))
-    net.export("{}".format(args.model))
-    net = amp.convert_hybrid_block(net, cast_optional_params=args.cast_optional_params)
-    net.export("{}-amp".format(args.model), remove_amp_cast=False)
-    if args.run_dummy_inference:
-        logger.info("Running inference on the mixed precision model with dummy inputs, batch size: 1")
-        result_after = net.forward(mx.nd.random.uniform(shape=shape, dtype=np.float32, ctx=mx.gpu(0)))
-        result_after = net.forward(mx.nd.random.uniform(shape=shape, dtype=np.float32, ctx=mx.gpu(0)))
-        logger.info("Inference run successfully")
diff --git a/example/bi-lstm-sort/bi-lstm-sort.ipynb b/example/bi-lstm-sort/bi-lstm-sort.ipynb
index 5d18be3..df9a9c5 100644
--- a/example/bi-lstm-sort/bi-lstm-sort.ipynb
+++ b/example/bi-lstm-sort/bi-lstm-sort.ipynb
@@ -2,37 +2,35 @@
  "cells": [
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "# Using a bi-lstm to sort a sequence of integers"
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
    "source": [
     "import random\n",
     "import string\n",
     "\n",
     "import mxnet as mx\n",
-    "from mxnet import gluon, nd\n",
-    "import numpy as np"
-   ]
+    "from mxnet import gluon, np\n",
+    "import numpy as onp"
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "## Data Preparation"
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
    "source": [
     "max_num = 999\n",
     "dataset_size = 60000\n",
@@ -40,11 +38,12 @@
     "split = 0.8\n",
     "batch_size = 512\n",
     "ctx = mx.gpu() if mx.context.num_gpus() > 0 else mx.cpu()"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "We are getting a dataset of **dataset_size** sequences of integers of length **seq_len** between **0** and **max_num**. We use **split*100%** of them for training and the rest for testing.\n",
     "\n",
@@ -56,68 +55,68 @@
     "Should return\n",
     "\n",
     "10 30 50 200 999"
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 3,
-   "metadata": {},
-   "outputs": [],
    "source": [
-    "X = mx.random.uniform(low=0, high=max_num, shape=(dataset_size, seq_len)).astype('int32').asnumpy()\n",
+    "X = mx.np.random.uniform(low=0, high=max_num, size=(dataset_size, seq_len)).astype('int32').asnumpy()\n",
     "Y = X.copy()\n",
     "Y.sort() #Let's sort X to get the target"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 4,
-   "metadata": {},
+   "source": [
+    "print(\"Input {}\\nTarget {}\".format(X[0].tolist(), Y[0].tolist()))"
+   ],
    "outputs": [
     {
-     "name": "stdout",
      "output_type": "stream",
+     "name": "stdout",
      "text": [
       "Input [548, 592, 714, 843, 602]\n",
       "Target [548, 592, 602, 714, 843]\n"
      ]
     }
    ],
-   "source": [
-    "print(\"Input {}\\nTarget {}\".format(X[0].tolist(), Y[0].tolist()))"
-   ]
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "For the purpose of training, we encode the input as characters rather than numbers"
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 5,
-   "metadata": {},
+   "source": [
+    "vocab = string.digits + \" \"\n",
+    "print(vocab)\n",
+    "vocab_idx = { c:i for i,c in enumerate(vocab)}\n",
+    "print(vocab_idx)"
+   ],
    "outputs": [
     {
-     "name": "stdout",
      "output_type": "stream",
+     "name": "stdout",
      "text": [
       "0123456789 \n",
       "{'0': 0, '1': 1, '2': 2, '3': 3, '4': 4, '5': 5, '6': 6, '7': 7, '8': 8, '9': 9, ' ': 10}\n"
      ]
     }
    ],
-   "source": [
-    "vocab = string.digits + \" \"\n",
-    "print(vocab)\n",
-    "vocab_idx = { c:i for i,c in enumerate(vocab)}\n",
-    "print(vocab_idx)"
-   ]
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "We write a transform that will convert our numbers into text of maximum length **max_len**, and one-hot encode the characters.\n",
     "For example:\n",
@@ -125,31 +124,30 @@
     "\"30 10\" corresponding indices are [3, 0, 10, 1, 0]\n",
     "\n",
     "We then one hot encode that and get a matrix representation of our input. We don't need to encode our target as the loss we are going to use support sparse labels"
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 6,
-   "metadata": {},
+   "source": [
+    "max_len = len(str(max_num))*seq_len+(seq_len-1)\n",
+    "print(\"Maximum length of the string: %s\" % max_len)"
+   ],
    "outputs": [
     {
-     "name": "stdout",
      "output_type": "stream",
+     "name": "stdout",
      "text": [
       "Maximum length of the string: 19\n"
      ]
     }
    ],
-   "source": [
-    "max_len = len(str(max_num))*seq_len+(seq_len-1)\n",
-    "print(\"Maximum length of the string: %s\" % max_len)"
-   ]
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 7,
-   "metadata": {},
-   "outputs": [],
    "source": [
     "def transform(x, y):\n",
     "    x_string = ' '.join(map(str, x.tolist()))\n",
@@ -158,28 +156,35 @@
     "    y_string = ' '.join(map(str, y.tolist()))\n",
     "    y_string_padded = y_string + ' '*(max_len-len(y_string))\n",
     "    y = [vocab_idx[c] for c in y_string_padded]\n",
-    "    return mx.nd.one_hot(mx.nd.array(x), len(vocab)), mx.nd.array(y)"
-   ]
+    "    return mx.npx.one_hot(mx.nd.array(x), len(vocab)), mx.np.array(y)"
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 8,
-   "metadata": {},
-   "outputs": [],
    "source": [
     "split_idx = int(split*len(X))\n",
     "train_dataset = gluon.data.ArrayDataset(X[:split_idx], Y[:split_idx]).transform(transform)\n",
     "test_dataset = gluon.data.ArrayDataset(X[split_idx:], Y[split_idx:]).transform(transform)"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 9,
-   "metadata": {},
+   "source": [
+    "print(\"Input {}\".format(X[0]))\n",
+    "print(\"Transformed data Input {}\".format(train_dataset[0][0]))\n",
+    "print(\"Target {}\".format(Y[0]))\n",
+    "print(\"Transformed data Target {}\".format(train_dataset[0][1]))"
+   ],
    "outputs": [
     {
-     "name": "stdout",
      "output_type": "stream",
+     "name": "stdout",
      "text": [
       "Input [548 592 714 843 602]\n",
       "Transformed data Input \n",
@@ -211,103 +216,115 @@
      ]
     }
    ],
-   "source": [
-    "print(\"Input {}\".format(X[0]))\n",
-    "print(\"Transformed data Input {}\".format(train_dataset[0][0]))\n",
-    "print(\"Target {}\".format(Y[0]))\n",
-    "print(\"Transformed data Target {}\".format(train_dataset[0][1]))"
-   ]
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 10,
-   "metadata": {},
-   "outputs": [],
    "source": [
     "train_data = gluon.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=20, last_batch='rollover')\n",
     "test_data = gluon.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=5, last_batch='rollover')"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "## Creating the network"
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 11,
-   "metadata": {},
-   "outputs": [],
    "source": [
     "net = gluon.nn.HybridSequential()\n",
-    "with net.name_scope():\n",
-    "    net.add(\n",
-    "        gluon.rnn.LSTM(hidden_size=128, num_layers=2, layout='NTC', bidirectional=True),\n",
-    "        gluon.nn.Dense(len(vocab), flatten=False)\n",
-    "    )"
-   ]
+    "net.add(\n",
+    "    gluon.rnn.LSTM(hidden_size=128, num_layers=2, layout='NTC', bidirectional=True),\n",
+    "    gluon.nn.Dense(len(vocab), flatten=False)\n",
+    ")"
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 12,
-   "metadata": {},
-   "outputs": [],
    "source": [
     "net.initialize(mx.init.Xavier(), ctx=ctx)"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 13,
-   "metadata": {},
-   "outputs": [],
    "source": [
     "loss = gluon.loss.SoftmaxCELoss()"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "We use a learning rate schedule to improve the convergence of the model"
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 14,
-   "metadata": {},
-   "outputs": [],
    "source": [
     "schedule = mx.lr_scheduler.FactorScheduler(step=len(train_data)*10, factor=0.75)\n",
     "schedule.base_lr = 0.01"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 15,
-   "metadata": {},
-   "outputs": [],
    "source": [
     "trainer = gluon.Trainer(net.collect_params(), 'adam', {'learning_rate':0.01, 'lr_scheduler':schedule})"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "## Training loop"
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 16,
-   "metadata": {},
+   "source": [
+    "epochs = 100\n",
+    "for e in range(epochs):\n",
+    "    epoch_loss = 0.\n",
+    "    for i, (data, label) in enumerate(train_data):\n",
+    "        data = data.as_in_context(ctx)\n",
+    "        label = label.as_in_context(ctx)\n",
+    "\n",
+    "        with mx.autograd.record():\n",
+    "            output = net(data)\n",
+    "            l = loss(output, label)\n",
+    "\n",
+    "        l.backward()\n",
+    "        trainer.step(data.shape[0])\n",
+    "    \n",
+    "        epoch_loss += l.mean()\n",
+    "        \n",
+    "    print(\"Epoch [{}] Loss: {}, LR {}\".format(e, epoch_loss.item()/(i+1), trainer.learning_rate))"
+   ],
    "outputs": [
     {
-     "name": "stdout",
      "output_type": "stream",
+     "name": "stdout",
      "text": [
       "Epoch [0] Loss: 1.6627886372227823, LR 0.01\n",
       "Epoch [1] Loss: 1.210370733382854, LR 0.01\n",
@@ -412,82 +429,68 @@
      ]
     }
    ],
-   "source": [
-    "epochs = 100\n",
-    "for e in range(epochs):\n",
-    "    epoch_loss = 0.\n",
-    "    for i, (data, label) in enumerate(train_data):\n",
-    "        data = data.as_in_context(ctx)\n",
-    "        label = label.as_in_context(ctx)\n",
-    "\n",
-    "        with mx.autograd.record():\n",
-    "            output = net(data)\n",
-    "            l = loss(output, label)\n",
-    "\n",
-    "        l.backward()\n",
-    "        trainer.step(data.shape[0])\n",
-    "    \n",
-    "        epoch_loss += l.mean()\n",
-    "        \n",
-    "    print(\"Epoch [{}] Loss: {}, LR {}\".format(e, epoch_loss.asscalar()/(i+1), trainer.learning_rate))"
-   ]
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "## Testing"
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "We get a random element from the testing set"
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 17,
-   "metadata": {},
-   "outputs": [],
    "source": [
     "n = random.randint(0, len(test_data)-1)\n",
     "\n",
     "x_orig = X[split_idx+n]\n",
     "y_orig = Y[split_idx+n]"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 41,
-   "metadata": {},
-   "outputs": [],
    "source": [
     "def get_pred(x):\n",
     "    x, _ = transform(x, x)\n",
-    "    output = net(x.as_in_context(ctx).expand_dims(axis=0))\n",
+    "    output = net(mx.np.expand_dims(x.as_in_ctx(ctx), axis=0))\n",
     "\n",
     "    # Convert output back to string\n",
     "    pred = ''.join([vocab[int(o)] for o in output[0].argmax(axis=1).asnumpy().tolist()])\n",
     "    return pred"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "Printing the result"
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 43,
-   "metadata": {},
+   "source": [
+    "x_ = ' '.join(map(str,x_orig))\n",
+    "label = ' '.join(map(str,y_orig))\n",
+    "print(\"X         {}\\nPredicted {}\\nLabel     {}\".format(x_, get_pred(x_orig), label))"
+   ],
    "outputs": [
     {
-     "name": "stdout",
      "output_type": "stream",
+     "name": "stdout",
      "text": [
       "X         611 671 275 871 944\n",
       "Predicted 275 611 671 871 944\n",
@@ -495,92 +498,88 @@
      ]
     }
    ],
-   "source": [
-    "x_ = ' '.join(map(str,x_orig))\n",
-    "label = ' '.join(map(str,y_orig))\n",
-    "print(\"X         {}\\nPredicted {}\\nLabel     {}\".format(x_, get_pred(x_orig), label))"
-   ]
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "We can also pick our own example, and the network manages to sort it without problem:"
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 66,
-   "metadata": {},
+   "source": [
+    "print(get_pred(onp.array([500, 30, 999, 10, 130])))"
+   ],
    "outputs": [
     {
-     "name": "stdout",
      "output_type": "stream",
+     "name": "stdout",
      "text": [
       "10 30 130 500 999  \n"
      ]
     }
    ],
-   "source": [
-    "print(get_pred(np.array([500, 30, 999, 10, 130])))"
-   ]
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "The model has even learned to generalize to examples not on the training set"
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 64,
-   "metadata": {},
+   "source": [
+    "print(\"Only four numbers:\", get_pred(onp.array([105, 302, 501, 202])))"
+   ],
    "outputs": [
     {
-     "name": "stdout",
      "output_type": "stream",
+     "name": "stdout",
      "text": [
       "Only four numbers: 105 202 302 501    \n"
      ]
     }
    ],
-   "source": [
-    "print(\"Only four numbers:\", get_pred(np.array([105, 302, 501, 202])))"
-   ]
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "However we can see it has trouble with other edge cases:"
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 63,
-   "metadata": {},
+   "source": [
+    "print(\"Small digits:\", get_pred(onp.array([10, 3, 5, 2, 8])))\n",
+    "print(\"Small digits, 6 numbers:\", get_pred(onp.array([10, 33, 52, 21, 82, 10])))"
+   ],
    "outputs": [
     {
-     "name": "stdout",
      "output_type": "stream",
+     "name": "stdout",
      "text": [
       "Small digits: 8  0 42 28         \n",
       "Small digits, 6 numbers: 10 0 20 82 71 115  \n"
      ]
     }
    ],
-   "source": [
-    "print(\"Small digits:\", get_pred(np.array([10, 3, 5, 2, 8])))\n",
-    "print(\"Small digits, 6 numbers:\", get_pred(np.array([10, 33, 52, 21, 82, 10])))"
-   ]
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "This could be improved by adjusting the training dataset accordingly"
-   ]
+   ],
+   "metadata": {}
   }
  ],
  "metadata": {
@@ -604,4 +603,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 2
-}
+}
\ No newline at end of file
diff --git a/example/gluon/actor_critic/actor_critic.py b/example/gluon/actor_critic/actor_critic.py
index 6d4474b..8a043f3 100644
--- a/example/gluon/actor_critic/actor_critic.py
+++ b/example/gluon/actor_critic/actor_critic.py
@@ -20,13 +20,12 @@ from __future__ import print_function
 import argparse
 import gym
 from itertools import count
-import numpy as np
+import numpy as onp
 
 import mxnet as mx
-import mxnet.ndarray as F
 from mxnet import gluon
 from mxnet.gluon import nn
-from mxnet import autograd
+from mxnet import autograd, npx
 
 
 parser = argparse.ArgumentParser(description='MXNet actor-critic example')
@@ -48,16 +47,15 @@ env.seed(args.seed)
 class Policy(gluon.Block):
     def __init__(self, **kwargs):
         super(Policy, self).__init__(**kwargs)
-        with self.name_scope():
-            self.dense = nn.Dense(16, in_units=4, activation='relu')
-            self.action_pred = nn.Dense(2, in_units=16)
-            self.value_pred = nn.Dense(1, in_units=16)
+        self.dense = nn.Dense(16, in_units=4, activation='relu')
+        self.action_pred = nn.Dense(2, in_units=16)
+        self.value_pred = nn.Dense(1, in_units=16)
 
     def forward(self, x):
         x = self.dense(x)
         probs = self.action_pred(x)
         values = self.value_pred(x)
-        return F.softmax(probs), values
+        return npx.softmax(probs), values
 
 net = Policy()
 net.initialize(mx.init.Uniform(0.02))
@@ -74,14 +72,14 @@ for epoch in count(1):
     with autograd.record():
         # Sample a sequence of actions
         for t in range(10000):
-            state = mx.nd.array(np.expand_dims(state, 0))
-            prob, value = net(state)
-            action, logp = mx.nd.sample_multinomial(prob, get_prob=True)
+            state = mx.nd.array(onp.expand_dims(state, 0))
+            prob, value = net(state.as_np_ndarray())
+            action, logp = mx.nd.sample_multinomial(prob.as_nd_ndarray(), get_prob=True)
             state, reward, done, _ = env.step(action.asnumpy()[0])
             if args.render:
                 env.render()
             rewards.append(reward)
-            values.append(value)
+            values.append(value.as_np_ndarray())
             actions.append(action.asnumpy()[0])
             heads.append(logp)
             if done:
@@ -93,12 +91,12 @@ for epoch in count(1):
         for i in range(len(rewards)-1, -1, -1):
             R = rewards[i] + args.gamma * R
             rewards[i] = R
-        rewards = np.array(rewards)
+        rewards = onp.array(rewards)
         rewards -= rewards.mean()
-        rewards /= rewards.std() + np.finfo(rewards.dtype).eps
+        rewards /= rewards.std() + onp.finfo(rewards.dtype).eps
 
         # compute loss and gradient
-        L = sum([loss(value, mx.nd.array([r])) for r, value in zip(rewards, values)])
+        L = sum([loss(value, mx.np.array([r])) for r, value in zip(rewards, values)])
         final_nodes = [L]
         for logp, r, v in zip(heads, rewards, values):
             reward = r - v.asnumpy()[0,0]
diff --git a/example/gluon/audio/README.md b/example/gluon/audio/README.md
deleted file mode 100644
index 39006e3..0000000
--- a/example/gluon/audio/README.md
+++ /dev/null
@@ -1,115 +0,0 @@
-<!--- Licensed to the Apache Software Foundation (ASF) under one -->
-<!--- or more contributor license agreements.  See the NOTICE file -->
-<!--- distributed with this work for additional information -->
-<!--- regarding copyright ownership.  The ASF licenses this file -->
-<!--- to you under the Apache License, Version 2.0 (the -->
-<!--- "License"); you may not use this file except in compliance -->
-<!--- with the License.  You may obtain a copy of the License at -->
-
-<!---   http://www.apache.org/licenses/LICENSE-2.0 -->
-
-<!--- Unless required by applicable law or agreed to in writing, -->
-<!--- software distributed under the License is distributed on an -->
-<!--- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -->
-<!--- KIND, either express or implied.  See the License for the -->
-<!--- specific language governing permissions and limitations -->
-<!--- under the License. -->
-
-# Urban Sounds Classification in MXNet Gluon
-
-This example provides an end-to-end pipeline for a common datahack competition - [Urban Sounds Classification Example](https://datahack.analyticsvidhya.com/contest/practice-problem-urban-sound-classification/).
-
-After logging in, the data set can be downloaded.
-The details of the dataset and the link to download it are given below:
-
-
-## Urban Sounds Dataset:
-### Description
-  The dataset contains 8732 wav files which are audio samples(<= 4s)) of street sounds like engine_idling, car_horn, children_playing, dog_barking and so on.
-  The task is to classify these audio samples into one of the following 10 labels:
-  ```
-  siren,
-  street_music,
-  drilling,
-  dog_bark,
-  children_playing,
-  gun_shot,
-  engine_idling,
-  air_conditioner,
-  jackhammer,
-  car_horn
-  ```
-
-To be able to run this example:
-
-1. `pip install -r requirements.txt`
-
-    If you are in the directory where the requirements.txt file lies,
-    this step installs the required libraries to run the example.
-    The main dependency that is required is: Librosa. 
-    The version used to test the example is: `0.6.2`
-    For more details, refer here:
-https://librosa.github.io/librosa/install.html
-
-2. Download the dataset(train.zip, test.zip) required for this example from the location:
-https://drive.google.com/drive/folders/0By0bAi7hOBAFUHVXd1JCN3MwTEU
-
-3. Extract both the zip archives into the **current directory** - after unzipping you would get 2 new folders namely,
-   **Train** and **Test** and two csv files - **train.csv**, **test.csv**
-
-   Assuming you are in a directory *"UrbanSounds"*, after downloading and extracting train.zip, the folder structure should be:
-   
-   ```
-        UrbanSounds        
-                    - Train
-                        - 0.wav, 1.wav ...
-                    - train.csv
-                    - train.py
-                    - predict.py ...
-    ```
-
-4. Apache MXNet is installed on the machine. For instructions, go to the link: https://mxnet.apache.org/install/
-
-
-
-For information on the current design of how the AudioFolderDataset is implemented, refer below:
-https://cwiki.apache.org/confluence/display/MXNET/Gluon+-+Audio
-
-### Usage 
-
-For training:
-
-- Arguments
-  - train : The folder/directory that contains the audio(wav) files locally. Default = "./Train"
-  - csv: The file name of the csv file that contains audio file name to label mapping. Default = "train.csv"
-  - epochs : Number of epochs to train the model. Default = 30
-  - batch_size : The batch size for training. Default = 32
-
-
-###### To use the default arguments, use:
-```
-python train.py
-``` 
-or
-
-###### To pass command-line arguments for training data directory, epochs, batch_size, csv file name, use :
-```
-python train.py --train ./Train --csv train.csv --batch_size 32 --epochs 30 
-```
-
-For prediction:
-
-- Arguments
-  - pred : The folder/directory that contains the audio(wav) files which are to be classified. Default = "./Test"
-
-
-###### To use the default arguments, use:
-```
-python predict.py
-``` 
-or
-
-###### To pass command-line arguments for test data directory, use :
-```
-python predict.py --pred ./Test
-```
diff --git a/example/gluon/audio/transforms.py b/example/gluon/audio/transforms.py
deleted file mode 100644
index 8b76d13..0000000
--- a/example/gluon/audio/transforms.py
+++ /dev/null
@@ -1,205 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# coding: utf-8
-# pylint: disable= arguments-differ
-"""Audio transforms."""
-
-import warnings
-import numpy as np
-try:
-    import librosa
-except ImportError as e:
-    warnings.warn("librosa dependency could not be resolved or \
-    imported, could not provide some/all transform.")
-
-from mxnet import ndarray as nd
-from mxnet.gluon.block import Block
-
-class MFCC(Block):
-    """Extracts Mel frequency cepstrum coefficients from the audio data file
-    More details : https://librosa.github.io/librosa/generated/librosa.feature.mfcc.html
-
-    Attributes
-    ----------
-    sampling_rate: int, default 22050
-        sampling rate of the input audio signal
-    num_mfcc: int, default 20
-        number of mfccs to return
-
-
-    Inputs:
-        - **x**: input tensor (samples, ) shape.
-
-    Outputs:
-        - **out**: output array is a scaled NDArray with (samples, ) shape.
-
-    """
-
-    def __init__(self, sampling_rate=22050, num_mfcc=20):
-        self._sampling_rate = sampling_rate
-        self._num_fcc = num_mfcc
-        super(MFCC, self).__init__()
-
-    def forward(self, x):
-        if isinstance(x, np.ndarray):
-            y = x
-        elif isinstance(x, nd.NDArray):
-            y = x.asnumpy()
-        else:
-            warnings.warn("MFCC - allowed datatypes mx.nd.NDArray and numpy.ndarray")
-            return x
-
-        audio_tmp = np.mean(librosa.feature.mfcc(y=y, sr=self._sampling_rate, n_mfcc=self._num_fcc).T, axis=0)
-        return nd.array(audio_tmp)
-
-
-class Scale(Block):
-    """Scale audio numpy.ndarray from a 16-bit integer to a floating point number between
-    -1.0 and 1.0. The 16-bit integer is the sample resolution or bit depth.
-
-    Attributes
-    ----------
-    scale_factor : float
-        The factor to scale the input tensor by.
-
-
-    Inputs:
-        - **x**: input tensor (samples, ) shape.
-
-    Outputs:
-        - **out**: output array is a scaled NDArray with (samples, ) shape.
-
-    Examples
-    --------
-    >>> scale = audio.transforms.Scale(scale_factor=2)
-    >>> audio_samples = mx.nd.array([2,3,4])
-    >>> scale(audio_samples)
-    [1.  1.5 2. ]
-    <NDArray 3 @cpu(0)>
-
-    """
-
-    def __init__(self, scale_factor=2**31):
-        self.scale_factor = scale_factor
-        super(Scale, self).__init__()
-
-    def forward(self, x):
-        if self.scale_factor == 0:
-            warnings.warn("Scale factor cannot be 0.")
-            return x
-        if isinstance(x, np.ndarray):
-            return nd.array(x/self.scale_factor)
-        return x / self.scale_factor
-
-
-class PadTrim(Block):
-    """Pad/Trim a 1d-NDArray of NPArray (Signal or Labels)
-
-    Attributes
-    ----------
-    max_len : int
-        Length to which the array will be padded or trimmed to.
-    fill_value: int or float
-        If there is a need of padding, what value to pad at the end of the input array.
-
-
-    Inputs:
-        - **x**: input tensor (samples, ) shape.
-
-    Outputs:
-        - **out**: output array is a scaled NDArray with (max_len, ) shape.
-
-    Examples
-    --------
-    >>> padtrim = audio.transforms.PadTrim(max_len=9, fill_value=0)
-    >>> audio_samples = mx.nd.array([1,2,3,4,5])
-    >>> padtrim(audio_samples)
-    [1. 2. 3. 4. 5. 0. 0. 0. 0.]
-    <NDArray 9 @cpu(0)>
-
-    """
-
-    def __init__(self, max_len, fill_value=0):
-        self._max_len = max_len
-        self._fill_value = fill_value
-        super(PadTrim, self).__init__()
-
-    def forward(self, x):
-        if  isinstance(x, np.ndarray):
-            x = nd.array(x)
-        if self._max_len > x.size:
-            pad = nd.ones((self._max_len - x.size,)) * self._fill_value
-            x = nd.concat(x, pad, dim=0)
-        elif self._max_len < x.size:
-            x = x[:self._max_len]
-        return x
-
-
-class MEL(Block):
-    """Create MEL Spectrograms from a raw audio signal. Relatively pretty slow.
-
-    Attributes
-    ----------
-    sampling_rate: int, default 22050
-        sampling rate of the input audio signal
-    num_fft: int, default 2048
-        length of the Fast Fourier transform window
-    num_mels: int, default 20
-        number of mel bands to generate
-    hop_length: int, default 512
-        total samples between successive frames
-
-
-    Inputs:
-        - **x**: input tensor (samples, ) shape.
-
-    Outputs:
-        - **out**: output array which consists of mel spectograms, shape = (n_mels, 1)
-
-       Usage (see librosa.feature.melspectrogram docs):
-           MEL(sr=16000, n_fft=1600, hop_length=800, n_mels=64)
-
-    Examples
-    --------
-    >>> mel = audio.transforms.MEL()
-    >>> audio_samples = mx.nd.array([1,2,3,4,5])
-    >>> mel(audio_samples)
-    [[3.81801406e+04]
-    [9.86858240e-29]
-    [1.87405472e-29]
-    [2.38637225e-29]
-    [3.94043010e-29]
-    [3.67071565e-29]
-    [7.29390295e-29]
-    [8.84324438e-30]...
-    <NDArray 128x1 @cpu(0)>
-
-    """
-
-    def __init__(self, sampling_rate=22050, num_fft=2048, num_mels=20, hop_length=512):
-        self._sampling_rate = sampling_rate
-        self._num_fft = num_fft
-        self._num_mels = num_mels
-        self._hop_length = hop_length
-        super(MEL, self).__init__()
-
-    def forward(self, x):
-        if isinstance(x, nd.NDArray):
-            x = x.asnumpy()
-        specs = librosa.feature.melspectrogram(x, sr=self._sampling_rate,\
-        n_fft=self._num_fft, n_mels=self._num_mels, hop_length=self._hop_length)
-        return nd.array(specs)
diff --git a/example/gluon/audio/urban_sounds/README.md b/example/gluon/audio/urban_sounds/README.md
deleted file mode 100644
index 4ad76ff..0000000
--- a/example/gluon/audio/urban_sounds/README.md
+++ /dev/null
@@ -1,117 +0,0 @@
-<!--- Licensed to the Apache Software Foundation (ASF) under one -->
-<!--- or more contributor license agreements.  See the NOTICE file -->
-<!--- distributed with this work for additional information -->
-<!--- regarding copyright ownership.  The ASF licenses this file -->
-<!--- to you under the Apache License, Version 2.0 (the -->
-<!--- "License"); you may not use this file except in compliance -->
-<!--- with the License.  You may obtain a copy of the License at -->
-
-<!---   http://www.apache.org/licenses/LICENSE-2.0 -->
-
-<!--- Unless required by applicable law or agreed to in writing, -->
-<!--- software distributed under the License is distributed on an -->
-<!--- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -->
-<!--- KIND, either express or implied.  See the License for the -->
-<!--- specific language governing permissions and limitations -->
-<!--- under the License. -->
-
-# Urban Sounds Classification in MXNet Gluon
-
-This example provides an end-to-end pipeline for a common datahack competition - Urban Sounds Classification Example.
-Below is the link to the competition:
-https://datahack.analyticsvidhya.com/contest/practice-problem-urban-sound-classification/
-
-After logging in, the data set can be downloaded.
-The details of the dataset and the link to download it are given below:
-
-
-## Urban Sounds Dataset:
-### Description
-  The dataset contains 8732 wav files which are audio samples(<= 4s)) of street sounds like engine_idling, car_horn, children_playing, dog_barking and so on.
-  The task is to classify these audio samples into one of the following 10 labels:
-  ```
-  siren,
-  street_music,
-  drilling,
-  dog_bark,
-  children_playing,
-  gun_shot,
-  engine_idling,
-  air_conditioner,
-  jackhammer,
-  car_horn
-  ```
-
-To be able to run this example:
-
-1. `pip install -r requirements.txt`
-
-    If you are in the directory where the requirements.txt file lies,
-    this step installs the required libraries to run the example.
-    The main dependency that is required is: Librosa. 
-    The version used to test the example is: `0.6.2`
-    For more details, refer here:
-https://librosa.github.io/librosa/install.html
-
-2. Download the dataset(train.zip, test.zip) required for this example from the location:
-https://drive.google.com/drive/folders/0By0bAi7hOBAFUHVXd1JCN3MwTEU
-
-3. Extract both the zip archives into the **current directory** - after unzipping you would get 2 new folders namely,
-   **Train** and **Test** and two csv files - **train.csv**, **test.csv**
-
-   Assuming you are in a directory *"UrbanSounds"*, after downloading and extracting train.zip, the folder structure should be:
-   
-   ```
-        UrbanSounds        
-                    - Train
-                        - 0.wav, 1.wav ...
-                    - train.csv
-                    - train.py
-                    - predict.py ...
-    ```
-
-4. Apache MXNet is installed on the machine. For instructions, go to the link: https://mxnet.apache.org/install/
-
-
-
-For information on the current design of how the AudioFolderDataset is implemented, refer below:
-https://cwiki.apache.org/confluence/display/MXNET/Gluon+-+Audio
-
-### Usage 
-
-For training:
-
-- Arguments
-  - train : The folder/directory that contains the audio(wav) files locally. Default = "./Train"
-  - csv: The file name of the csv file that contains audio file name to label mapping. Default = "train.csv"
-  - epochs : Number of epochs to train the model. Default = 30
-  - batch_size : The batch size for training. Default = 32
-
-
-###### To use the default arguments, use:
-```
-python train.py
-``` 
-or
-
-###### To pass command-line arguments for training data directory, epochs, batch_size, csv file name, use :
-```
-python train.py --train ./Train --csv train.csv --batch_size 32 --epochs 30 
-```
-
-For prediction:
-
-- Arguments
-  - pred : The folder/directory that contains the audio(wav) files which are to be classified. Default = "./Test"
-
-
-###### To use the default arguments, use:
-```
-python predict.py
-``` 
-or
-
-###### To pass command-line arguments for test data directory, use :
-```
-python predict.py --pred ./Test
-```
\ No newline at end of file
diff --git a/example/gluon/audio/urban_sounds/datasets.py b/example/gluon/audio/urban_sounds/datasets.py
deleted file mode 100644
index 51c040c..0000000
--- a/example/gluon/audio/urban_sounds/datasets.py
+++ /dev/null
@@ -1,179 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# coding: utf-8
-# pylint: disable=
-""" Audio Dataset container."""
-from __future__ import print_function
-__all__ = ['AudioFolderDataset']
-
-import os
-import warnings
-from itertools import islice
-import csv
-from mxnet.gluon.data import Dataset
-from mxnet import ndarray as nd
-try:
-    import librosa
-except ImportError as e:
-    raise ImportError("librosa dependency could not be resolved or \
-    imported, could not load audio onto the numpy array. pip install librosa")
-
-
-
-class AudioFolderDataset(Dataset):
-    """A dataset for loading Audio files stored in a folder structure like::
-
-        root/children_playing/0.wav
-        root/siren/23.wav
-        root/drilling/26.wav
-        root/dog_barking/42.wav
-            OR
-        Files(wav) and a csv file that has file name and associated label
-
-    Parameters
-    ----------
-    root : str
-        Path to root directory.
-    transform : callable, default None
-        A function that takes data and label and transforms them
-    train_csv: str, default None
-       train_csv should be populated by the training csv filename
-    file_format: str, default '.wav'
-        The format of the audio files(.wav)
-    skip_header: boolean, default False
-        While reading from csv file, whether to skip at the start of the file to avoid reading in header
-
-
-    Attributes
-    ----------
-    synsets : list
-        List of class names. `synsets[i]` is the name for the  `i`th label
-    items : list of tuples
-        List of all audio in (filename, label) pairs.
-
-    """
-    def __init__(self, root, train_csv=None, file_format='.wav', skip_header=False):
-        if not librosa:
-            warnings.warn("pip install librosa to continue.")
-            raise RuntimeError("Librosa not installed. Run pip install librosa and retry this step.")
-        self._root = os.path.expanduser(root)
-        self._exts = ['.wav']
-        self._format = file_format
-        self._train_csv = train_csv
-        if file_format.lower() not in self._exts:
-            raise RuntimeError("Format {} not supported currently.".format(file_format))
-        skip_rows = 0
-        if skip_header:
-            skip_rows = 1
-        self._list_audio_files(self._root, skip_rows=skip_rows)
-
-
-    def _list_audio_files(self, root, skip_rows=0):
-        """Populates synsets - a map of index to label for the data items.
-        Populates the data in the dataset, making tuples of (data, label)
-        """
-        self.synsets = []
-        self.items = []
-        if not self._train_csv:
-            # The audio files are organized in folder structure with
-            # directory name as label and audios in them
-            self._folder_structure(root)
-        else:
-            # train_csv contains mapping between filename and label
-            self._csv_labelled_dataset(root, skip_rows=skip_rows)
-
-        # Generating the synset.txt file now
-        if not os.path.exists("./synset.txt"):
-            with open("./synset.txt", "w") as synsets_file:
-                for item in self.synsets:
-                    synsets_file.write(item+os.linesep)
-            print("Synsets is generated as synset.txt")
-        else:
-            warnings.warn("Synset file already exists in the current directory! Not generating synset.txt.")
-
-
-    def _folder_structure(self, root):
-        for folder in sorted(os.listdir(root)):
-            path = os.path.join(root, folder)
-            if not os.path.isdir(path):
-                warnings.warn('Ignoring {}, which is not a directory.'.format(path))
-                continue
-            label = len(self.synsets)
-            self.synsets.append(folder)
-            for filename in sorted(os.listdir(path)):
-                file_name = os.path.join(path, filename)
-                ext = os.path.splitext(file_name)[1]
-                if ext.lower() not in self._exts:
-                    warnings.warn('Ignoring {} of type {}. Only support {}'\
-                    .format(filename, ext, ', '.join(self._exts)))
-                    continue
-                self.items.append((file_name, label))
-
-
-    def _csv_labelled_dataset(self, root, skip_rows=0):
-        with open(self._train_csv, "r") as traincsv:
-            for line in islice(csv.reader(traincsv), skip_rows, None):
-                filename = os.path.join(root, line[0])
-                label = line[1].strip()
-                if label not in self.synsets:
-                    self.synsets.append(label)
-                if self._format not in filename:
-                    filename = filename+self._format
-                self.items.append((filename, nd.array([self.synsets.index(label)]).reshape((1,))))
-
-
-    def __getitem__(self, idx):
-        """Retrieve the item (data, label) stored at idx in items"""
-        filename, label = self.items[idx]
-        # resampling_type is passed as kaiser_fast for a better performance
-        X1, _ = librosa.load(filename, res_type='kaiser_fast')
-        return nd.array(X1), label
-
-
-    def __len__(self):
-        """Retrieves the number of items in the dataset"""
-        return len(self.items)
-
-
-    def transform_first(self, fn, lazy=False):
-        """Returns a new dataset with the first element of each sample
-        transformed by the transformer function `fn`.
-
-        This is useful, for example, when you only want to transform data
-        while keeping label as is.
-        lazy=False is passed to transform_first for dataset so that all tramsforms could be performed in
-        one shot and not during training. This is a performance consideration.
-
-        Parameters
-        ----------
-        fn : callable
-            A transformer function that takes the first element of a sample
-            as input and returns the transformed element.
-        lazy : bool, default False
-            If False, transforms all samples at once. Otherwise,
-            transforms each sample on demand. Note that if `fn`
-            is stochastic, you must set lazy to True or you will
-            get the same result on all epochs.
-
-        Returns
-        -------
-        Dataset
-            The transformed dataset.
-
-        """
-        return super(AudioFolderDataset, self).transform_first(fn, lazy=lazy)
diff --git a/example/gluon/audio/urban_sounds/model.py b/example/gluon/audio/urban_sounds/model.py
deleted file mode 100644
index af23cb9..0000000
--- a/example/gluon/audio/urban_sounds/model.py
+++ /dev/null
@@ -1,33 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""This module builds a model an MLP with a configurable output layer( number of units in the last layer).
-Users can pass any number of units in the last layer. SInce this dataset has 10 labels,
-the default value of num_labels = 10
-"""
-import mxnet as mx
-from mxnet import gluon
-
-# Defining a neural network with number of labels
-def get_net(num_labels=10):
-    net = gluon.nn.Sequential()
-    with net.name_scope():
-        net.add(gluon.nn.Dense(256, activation="relu")) # 1st layer (256 nodes)
-        net.add(gluon.nn.Dense(256, activation="relu")) # 2nd hidden layer ( 256 nodes )
-        net.add(gluon.nn.Dense(num_labels))
-    net.collect_params().initialize(mx.init.Xavier())
-    return net
diff --git a/example/gluon/audio/urban_sounds/predict.py b/example/gluon/audio/urban_sounds/predict.py
deleted file mode 100644
index 0c36311..0000000
--- a/example/gluon/audio/urban_sounds/predict.py
+++ /dev/null
@@ -1,92 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-""" Prediction module for Urban Sounds Classification"""
-from __future__ import print_function
-import os
-import sys
-import warnings
-import mxnet as mx
-from mxnet import nd
-from model import get_net
-try:
-    import librosa
-except ImportError:
-    raise ImportError("Librosa is not installed! please run the following command:\
-     `pip install librosa`")
-sys.path.append('../')
-
-def predict(prediction_dir='./Test'):
-    """The function is used to run predictions on the audio files in the directory `pred_directory`.
-
-    Parameters
-    ----------
-    net:
-        The model that has been trained.
-    prediction_dir: string, default ./Test
-        The directory that contains the audio files on which predictions are to be made
-
-    """
-
-    if not os.path.exists(prediction_dir):
-        warnings.warn("The directory on which predictions are to be made is not found!")
-        return
-
-    if len(os.listdir(prediction_dir)) == 0:
-        warnings.warn("The directory on which predictions are to be made is empty! Exiting...")
-        return
-
-    # Loading synsets
-    if not os.path.exists('./synset.txt'):
-        warnings.warn("The synset or labels for the dataset do not exist. Please run the training script first.")
-        return
-
-    with open("./synset.txt", "r") as f:
-        synset = [l.rstrip() for l in f]
-    net = get_net(len(synset))
-    print("Trying to load the model with the saved parameters...")
-    if not os.path.exists("./net.params"):
-        warnings.warn("The model does not have any saved parameters... Cannot proceed! Train the model first")
-        return
-
-    net.load_parameters("./net.params")
-    file_names = os.listdir(prediction_dir)
-    full_file_names = [os.path.join(prediction_dir, item) for item in file_names]
-    from transforms import MFCC
-    mfcc = MFCC()
-    print("\nStarting predictions for audio files in ", prediction_dir, " ....\n")
-    for filename in full_file_names:
-        # Argument kaiser_fast to res_type is faster than 'kaiser_best'. To reduce the load time, passing kaiser_fast.
-        X1, _ = librosa.load(filename, res_type='kaiser_fast')
-        transformed_test_data = mfcc(mx.nd.array(X1))
-        output = net(transformed_test_data.reshape((1, -1)))
-        prediction = nd.argmax(output, axis=1)
-        print(filename, " -> ", synset[(int)(prediction.asscalar())])
-
-
-if __name__ == '__main__':
-    try:
-        import argparse
-        parser = argparse.ArgumentParser(description="Urban Sounds clsssification example - MXNet")
-        parser.add_argument('--pred', '-p', help="Enter the folder path that contains your audio files", type=str)
-        args = parser.parse_args()
-        pred_dir = args.pred
-
-    except ImportError:
-        warnings.warn("Argparse module not installed! passing default arguments.")
-        pred_dir = './Test'
-    predict(prediction_dir=pred_dir)
-    print("Urban sounds classification Prediction DONE!")
diff --git a/example/gluon/audio/urban_sounds/requirements.txt b/example/gluon/audio/urban_sounds/requirements.txt
deleted file mode 100644
index d885e0b..0000000
--- a/example/gluon/audio/urban_sounds/requirements.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-librosa>=0.6.2 # librosa is a library that is used to load the audio(wav) files and provides capabilities of feature extraction.
-argparse # used for parsing arguments
\ No newline at end of file
diff --git a/example/gluon/audio/urban_sounds/train.py b/example/gluon/audio/urban_sounds/train.py
deleted file mode 100644
index 8a55c5b..0000000
--- a/example/gluon/audio/urban_sounds/train.py
+++ /dev/null
@@ -1,157 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""The module to run training on the Urban sounds dataset"""
-from __future__ import print_function
-import sys
-import os
-import time
-import warnings
-import mxnet as mx
-from mxnet import gluon, nd, autograd
-from datasets import AudioFolderDataset
-import model
-sys.path.append('../')
-
-def evaluate_accuracy(data_iterator, net):
-    """Function to evaluate accuracy of any data iterator passed to it as an argument"""
-    acc = mx.gluon.metric.Accuracy()
-    for data, label in data_iterator:
-        output = net(data)
-        predictions = nd.argmax(output, axis=1)
-        predictions = predictions.reshape((-1, 1))
-        acc.update(preds=predictions, labels=label)
-    return acc.get()[1]
-
-
-def train(train_dir=None, train_csv=None, epochs=30, batch_size=32):
-    """Function responsible for running the training the model."""
-
-    if not train_dir or not os.path.exists(train_dir) or not train_csv:
-        warnings.warn("No train directory could be found ")
-        return
-    # Make a dataset from the local folder containing Audio data
-    print("\nMaking an Audio Dataset...\n")
-    tick = time.time()
-    aud_dataset = AudioFolderDataset(train_dir, train_csv=train_csv, file_format='.wav', skip_header=True)
-    tock = time.time()
-
-    print("Loading the dataset took ", (tock-tick), " seconds.")
-    print("\n=======================================\n")
-    print("Number of output classes = ", len(aud_dataset.synsets))
-    print("\nThe labels are : \n")
-    print(aud_dataset.synsets)
-    # Get the model to train
-    net = model.get_net(len(aud_dataset.synsets))
-    print("\nNeural Network = \n")
-    print(net)
-    print("\nModel - Neural Network Generated!\n")
-    print("=======================================\n")
-
-    #Define the loss - Softmax CE Loss
-    softmax_loss = gluon.loss.SoftmaxCELoss(from_logits=False, sparse_label=True)
-    print("Loss function initialized!\n")
-    print("=======================================\n")
-
-    #Define the trainer with the optimizer
-    trainer = gluon.Trainer(net.collect_params(), 'adadelta')
-    print("Optimizer - Trainer function initialized!\n")
-    print("=======================================\n")
-    print("Loading the dataset to the Gluon's OOTB Dataloader...")
-
-    #Getting the data loader out of the AudioDataset and passing the transform
-    from transforms import MFCC
-    aud_transform = MFCC()
-    tick = time.time()
-
-    audio_train_loader = gluon.data.DataLoader(aud_dataset.transform_first(aud_transform), batch_size=32, shuffle=True)
-    tock = time.time()
-    print("Time taken to load data and apply transform here is ", (tock-tick), " seconds.")
-    print("=======================================\n")
-
-
-    print("Starting the training....\n")
-    # Training loop
-    tick = time.time()
-    batch_size = batch_size
-    num_examples = len(aud_dataset)
-
-    for epoch in range(epochs):
-        cumulative_loss = 0
-        for data, label in audio_train_loader:
-            with autograd.record():
-                output = net(data)
-                loss = softmax_loss(output, label)
-            loss.backward()
-
-            trainer.step(batch_size)
-            cumulative_loss += mx.nd.sum(loss).asscalar()
-
-        if epoch%5 == 0:
-            train_accuracy = evaluate_accuracy(audio_train_loader, net)
-            print("Epoch {}. Loss: {} Train accuracy : {} ".format(epoch, cumulative_loss/num_examples, train_accuracy))
-            print("\n------------------------------\n")
-
-    train_accuracy = evaluate_accuracy(audio_train_loader, net)
-    tock = time.time()
-    print("\nFinal training accuracy: ", train_accuracy)
-
-    print("Training the sound classification for ", epochs, " epochs, MLP model took ", (tock-tick), " seconds")
-    print("====================== END ======================\n")
-
-    print("Trying to save the model parameters here...")
-    net.save_parameters("./net.params")
-    print("Saved the model parameters in current directory.")
-
-
-if __name__ == '__main__':
-    training_dir = './Train'
-    training_csv = './train.csv'
-    epochs = 30
-    batch_size = 32
-
-    try:
-        import argparse
-        parser = argparse.ArgumentParser(description="Urban Sounds classification example - MXNet Gluon")
-        parser.add_argument('--train', '-t', help="Enter the folder path that contains your audio files", type=str)
-        parser.add_argument('--csv', '-c', help="Enter the filename of the csv that contains filename\
-        to label mapping", type=str)
-        parser.add_argument('--epochs', '-e', help="Enter the number of epochs \
-        you would want to run the training for.", type=int)
-        parser.add_argument('--batch_size', '-b', help="Enter the batch_size of data", type=int)
-        args = parser.parse_args()
-
-        if args:
-            if args.train:
-                training_dir = args.train
-
-            if args.csv:
-                training_csv = args.csv
-
-            if args.epochs:
-                epochs = args.epochs
-
-            if args.batch_size:
-                batch_size = args.batch_size
-
-
-    except ImportError as er:
-        warnings.warn("Argument parsing module could not be imported \
-        Passing default arguments.")
-
-
-    train(train_dir=training_dir, train_csv=training_csv, epochs=epochs, batch_size=batch_size)
-    print("Urban sounds classification Training DONE!")
diff --git a/example/gluon/data.py b/example/gluon/data.py
index 7d0f882..7769f60 100644
--- a/example/gluon/data.py
+++ b/example/gluon/data.py
@@ -174,7 +174,7 @@ class ImagePairIter(mx.io.DataIter):
                 image = Image.open(fn).convert('YCbCr').split()[0]
                 if image.size[0] > image.size[1]:
                     image = image.transpose(Image.TRANSPOSE)
-                image = mx.nd.expand_dims(mx.nd.array(image), axis=2)
+                image = mx.np.expand_dims(mx.np.array(image), axis=2)
                 target = image.copy()
                 for aug in self.input_aug:
                     image = aug(image)
@@ -183,10 +183,10 @@ class ImagePairIter(mx.io.DataIter):
                 data.append(image)
                 label.append(target)
 
-            data = mx.nd.concat(*[mx.nd.expand_dims(d, axis=0) for d in data], dim=0)
-            label = mx.nd.concat(*[mx.nd.expand_dims(d, axis=0) for d in label], dim=0)
-            data = [mx.nd.transpose(data, axes=(0, 3, 1, 2)).astype('float32')/255]
-            label = [mx.nd.transpose(label, axes=(0, 3, 1, 2)).astype('float32')/255]
+            data = mx.np.concatenate([mx.np.expand_dims(d, axis=0) for d in data], axis=0)
+            label = mx.np.concatenate([mx.np.expand_dims(d, axis=0) for d in label], axis=0)
+            data = [mx.np.transpose(data, axes=(0, 3, 1, 2)).astype('float32')/255]
+            label = [mx.np.transpose(label, axes=(0, 3, 1, 2)).astype('float32')/255]
 
             return mx.io.DataBatch(data=data, label=label)
         else:
diff --git a/example/gluon/dc_gan/README.md b/example/gluon/dc_gan/README.md
deleted file mode 100644
index fd41d19..0000000
--- a/example/gluon/dc_gan/README.md
+++ /dev/null
@@ -1,69 +0,0 @@
-<!--- Licensed to the Apache Software Foundation (ASF) under one -->
-<!--- or more contributor license agreements.  See the NOTICE file -->
-<!--- distributed with this work for additional information -->
-<!--- regarding copyright ownership.  The ASF licenses this file -->
-<!--- to you under the Apache License, Version 2.0 (the -->
-<!--- "License"); you may not use this file except in compliance -->
-<!--- with the License.  You may obtain a copy of the License at -->
-
-<!---   http://www.apache.org/licenses/LICENSE-2.0 -->
-
-<!--- Unless required by applicable law or agreed to in writing, -->
-<!--- software distributed under the License is distributed on an -->
-<!--- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -->
-<!--- KIND, either express or implied.  See the License for the -->
-<!--- specific language governing permissions and limitations -->
-<!--- under the License. -->
-
-# DCGAN in MXNet
-
-[Deep Convolutional Generative Adversarial Networks(DCGAN)](https://arxiv.org/abs/1511.06434) implementation with Apache MXNet GLUON.
-This implementation uses [inception_score](https://github.com/openai/improved-gan) to evaluate the model.
-
-You can use this reference implementation on the MNIST and CIFAR-10 datasets.
-
-
-#### Generated image output examples from the CIFAR-10 dataset
-![Generated image output examples from the CIFAR-10 dataset](https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/example/gluon/DCGAN/fake_img_iter_13900.png)
-
-#### Generated image output examples from the MNIST dataset
-![Generated image output examples from the MNIST dataset](https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/example/gluon/DCGAN/fake_img_iter_21700.png)
-
-#### inception_score in cpu and gpu (the real image`s score is around 3.3)
-CPU & GPU
-
-![inception score with CPU](https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/example/gluon/DCGAN/inception_score_cifar10_cpu.png)
-![inception score with GPU](https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/example/gluon/DCGAN/inception_score_cifar10.png)
-
-## Quick start
-Use the following code to see the configurations you can set:
-```bash
-python dcgan.py -h
-```
-    
-
-    optional arguments:
-      -h, --help            show this help message and exit
-      --dataset DATASET     dataset to use. options are cifar10 and mnist.
-      --batch-size BATCH_SIZE  input batch size, default is 64
-      --nz NZ               size of the latent z vector, default is 100
-      --ngf NGF             the channel of each generator filter layer, default is 64.
-      --ndf NDF             the channel of each descriminator filter layer, default is 64.
-      --nepoch NEPOCH       number of epochs to train for, default is 25.
-      --niter NITER         save generated images and inception_score per niter iters, default is 100.
-      --lr LR               learning rate, default=0.0002
-      --beta1 BETA1         beta1 for adam. default=0.5
-      --cuda                enables cuda
-      --netG NETG           path to netG (to continue training)
-      --netD NETD           path to netD (to continue training)
-      --outf OUTF           folder to output images and model checkpoints
-      --check-point CHECK_POINT
-                            save results at each epoch or not
-      --inception_score INCEPTION_SCORE
-                            To record the inception_score, default is True.
-
-
-Use the following Python script to train a DCGAN model with default configurations using the CIFAR-10 dataset and record metrics with `inception_score`:
-```bash
-python dcgan.py
-```
diff --git a/example/gluon/dc_gan/__init__.py b/example/gluon/dc_gan/__init__.py
deleted file mode 100644
index 26fa2ce..0000000
--- a/example/gluon/dc_gan/__init__.py
+++ /dev/null
@@ -1,16 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-
-#   http://www.apache.org/licenses/LICENSE-2.0
-
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
diff --git a/example/gluon/dc_gan/dcgan.py b/example/gluon/dc_gan/dcgan.py
deleted file mode 100644
index d7c36a0..0000000
--- a/example/gluon/dc_gan/dcgan.py
+++ /dev/null
@@ -1,355 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Generate MXNet implementation of Deep Convolutional Generative Adversarial Networks"""
-
-import logging
-from datetime import datetime
-import argparse
-import os
-import time
-import numpy as np
-from matplotlib import pyplot as plt
-import matplotlib as mpl
-import mxnet as mx
-from mxnet import gluon
-from mxnet.gluon import nn
-from mxnet import autograd
-from inception_score import get_inception_score
-
-mpl.use('Agg')
-
-
-def fill_buf(buf, i, img, shape):
-    """Reposition the images generated by the generator so that it can be saved as picture matrix.
-    :param buf: the images metric
-    :param i: index of each image
-    :param img: images generated by generator once
-    :param shape: each image`s shape
-    :return: Adjust images for output
-    """
-    n = buf.shape[0]//shape[1]
-    m = buf.shape[1]//shape[0]
-
-    sx = (i%m)*shape[0]
-    sy = (i//m)*shape[1]
-    buf[sy:sy+shape[1], sx:sx+shape[0], :] = img
-
-
-def visual(title, X, name):
-    """Image visualization and preservation
-    :param title: title
-    :param X: images to visualized
-    :param name: saved picture`s name
-    :return:
-    """
-    assert len(X.shape) == 4
-    X = X.transpose((0, 2, 3, 1))
-    X = np.clip((X - np.min(X))*(255.0/(np.max(X) - np.min(X))), 0, 255).astype(np.uint8)
-    n = np.ceil(np.sqrt(X.shape[0]))
-    buff = np.zeros((int(n*X.shape[1]), int(n*X.shape[2]), int(X.shape[3])), dtype=np.uint8)
-    for i, img in enumerate(X):
-        fill_buf(buff, i, img, X.shape[1:3])
-    buff = buff[:, :, ::-1]
-    plt.imshow(buff)
-    plt.title(title)
-    plt.savefig(name)
-
-
-parser = argparse.ArgumentParser()
-parser = argparse.ArgumentParser(description='Train a DCgan model for image generation '
-                                             'and then use inception_score to metric the result.')
-parser.add_argument('--dataset', type=str, default='cifar10', help='dataset to use. options are cifar10 and mnist.')
-parser.add_argument('--batch-size', type=int, default=64, help='input batch size, default is 64')
-parser.add_argument('--nz', type=int, default=100, help='size of the latent z vector, default is 100')
-parser.add_argument('--ngf', type=int, default=64, help='the channel of each generator filter layer, default is 64.')
-parser.add_argument('--ndf', type=int, default=64, help='the channel of each descriminator filter layer, '
-                                                        'default is 64.')
-parser.add_argument('--nepoch', type=int, default=25, help='number of epochs to train for, default is 25.')
-parser.add_argument('--niter', type=int, default=10, help='save generated images and inception_score per niter iters, '
-                                                          'default is 100.')
-parser.add_argument('--lr', type=float, default=0.0002, help='learning rate, default=0.0002')
-parser.add_argument('--beta1', type=float, default=0.5, help='beta1 for adam. default=0.5')
-parser.add_argument('--cuda', action='store_true', help='enables cuda')
-parser.add_argument('--netG', default='', help="path to netG (to continue training)")
-parser.add_argument('--netD', default='', help="path to netD (to continue training)")
-parser.add_argument('--outf', default='./results', help='folder to output images and model checkpoints')
-parser.add_argument('--check-point', default=True, help="save results at each epoch or not")
-parser.add_argument('--inception_score', type=bool, default=True, help='To record the inception_score, '
-                                                                       'default is True.')
-
-opt = parser.parse_args()
-print(opt)
-
-logging.basicConfig(level=logging.DEBUG)
-
-nz = int(opt.nz)
-ngf = int(opt.ngf)
-ndf = int(opt.ndf)
-niter = opt.niter
-nc = 3
-if opt.cuda:
-    ctx = mx.gpu(0)
-else:
-    ctx = mx.cpu()
-batch_size = opt.batch_size
-check_point = bool(opt.check_point)
-outf = opt.outf
-dataset = opt.dataset
-
-if not os.path.exists(outf):
-    os.makedirs(outf)
-
-
-def transformer(data, label):
-    """Get the translation of images"""
-    # resize to 64x64
-    data = mx.image.imresize(data, 64, 64)
-    # transpose from (64, 64, 3) to (3, 64, 64)
-    data = mx.nd.transpose(data, (2, 0, 1))
-    # normalize to [-1, 1]
-    data = data.astype(np.float32)/128 - 1
-    # if image is greyscale, repeat 3 times to get RGB image.
-    if data.shape[0] == 1:
-        data = mx.nd.tile(data, (3, 1, 1))
-    return data, label
-
-
-# get dataset with the batch_size num each time
-def get_dataset(dataset_name):
-    """Load the dataset and split it to train/valid data
-
-    :param dataset_name: string
-
-    Returns:
-    train_data: int array
-        training dataset
-    val_data: int array
-        valid dataset
-    """
-    # mnist
-    if dataset == "mnist":
-        train_data = gluon.data.DataLoader(
-            gluon.data.vision.MNIST('./data', train=True).transform(transformer),
-            batch_size, shuffle=True, last_batch='discard')
-
-        val_data = gluon.data.DataLoader(
-            gluon.data.vision.MNIST('./data', train=False).transform(transformer),
-            batch_size, shuffle=False)
-    # cifar10
-    elif dataset == "cifar10":
-        train_data = gluon.data.DataLoader(
-            gluon.data.vision.CIFAR10('./data', train=True).transform(transformer),
-            batch_size, shuffle=True, last_batch='discard')
-
-        val_data = gluon.data.DataLoader(
-            gluon.data.vision.CIFAR10('./data', train=False).transform(transformer),
-            batch_size, shuffle=False)
-
-    return train_data, val_data
-
-
-def get_netG():
-    """Get net G"""
-    # build the generator
-    netG = nn.Sequential()
-    with netG.name_scope():
-        # input is Z, going into a convolution
-        netG.add(nn.Conv2DTranspose(ngf * 8, 4, 1, 0, use_bias=False))
-        netG.add(nn.BatchNorm())
-        netG.add(nn.Activation('relu'))
-        # state size. (ngf*8) x 4 x 4
-        netG.add(nn.Conv2DTranspose(ngf * 4, 4, 2, 1, use_bias=False))
-        netG.add(nn.BatchNorm())
-        netG.add(nn.Activation('relu'))
-        # state size. (ngf*4) x 8 x 8
-        netG.add(nn.Conv2DTranspose(ngf * 2, 4, 2, 1, use_bias=False))
-        netG.add(nn.BatchNorm())
-        netG.add(nn.Activation('relu'))
-        # state size. (ngf*2) x 16 x 16
-        netG.add(nn.Conv2DTranspose(ngf, 4, 2, 1, use_bias=False))
-        netG.add(nn.BatchNorm())
-        netG.add(nn.Activation('relu'))
-        # state size. (ngf) x 32 x 32
-        netG.add(nn.Conv2DTranspose(nc, 4, 2, 1, use_bias=False))
-        netG.add(nn.Activation('tanh'))
-        # state size. (nc) x 64 x 64
-
-    return netG
-
-
-def get_netD():
-    """Get the netD"""
-    # build the discriminator
-    netD = nn.Sequential()
-    with netD.name_scope():
-        # input is (nc) x 64 x 64
-        netD.add(nn.Conv2D(ndf, 4, 2, 1, use_bias=False))
-        netD.add(nn.LeakyReLU(0.2))
-        # state size. (ndf) x 32 x 32
-        netD.add(nn.Conv2D(ndf * 2, 4, 2, 1, use_bias=False))
-        netD.add(nn.BatchNorm())
-        netD.add(nn.LeakyReLU(0.2))
-        # state size. (ndf*2) x 16 x 16
-        netD.add(nn.Conv2D(ndf * 4, 4, 2, 1, use_bias=False))
-        netD.add(nn.BatchNorm())
-        netD.add(nn.LeakyReLU(0.2))
-        # state size. (ndf*4) x 8 x 8
-        netD.add(nn.Conv2D(ndf * 8, 4, 2, 1, use_bias=False))
-        netD.add(nn.BatchNorm())
-        netD.add(nn.LeakyReLU(0.2))
-        # state size. (ndf*8) x 4 x 4
-        netD.add(nn.Conv2D(2, 4, 1, 0, use_bias=False))
-        # state size. 2 x 1 x 1
-
-    return netD
-
-
-def get_configurations(netG, netD):
-    """Get configurations for net"""
-    # loss
-    loss = gluon.loss.SoftmaxCrossEntropyLoss()
-
-    # initialize the generator and the discriminator
-    netG.initialize(mx.init.Normal(0.02), ctx=ctx)
-    netD.initialize(mx.init.Normal(0.02), ctx=ctx)
-
-    # trainer for the generator and the discriminator
-    trainerG = gluon.Trainer(netG.collect_params(), 'adam', {'learning_rate': opt.lr, 'beta1': opt.beta1})
-    trainerD = gluon.Trainer(netD.collect_params(), 'adam', {'learning_rate': opt.lr, 'beta1': opt.beta1})
-
-    return loss, trainerG, trainerD
-
-
-def ins_save(inception_score):
-    # draw the inception_score curve
-    length = len(inception_score)
-    x = np.arange(0, length)
-    plt.figure(figsize=(8.0, 6.0))
-    plt.plot(x, inception_score)
-    plt.xlabel("iter/100")
-    plt.ylabel("inception_score")
-    plt.savefig("inception_score.png")
-
-
-# main function
-def main():
-    """Entry point to dcgan"""
-    print("|------- new changes!!!!!!!!!")
-    # to get the dataset and net configuration
-    train_data, val_data = get_dataset(dataset)
-    netG = get_netG()
-    netD = get_netD()
-    loss, trainerG, trainerD = get_configurations(netG, netD)
-
-    # set labels
-    real_label = mx.nd.ones((opt.batch_size,), ctx=ctx)
-    fake_label = mx.nd.zeros((opt.batch_size,), ctx=ctx)
-
-    metric = mx.gluon.metric.Accuracy()
-    print('Training... ')
-    stamp = datetime.now().strftime('%Y_%m_%d-%H_%M')
-
-    iter = 0
-
-    # to metric the network
-    loss_d = []
-    loss_g = []
-    inception_score = []
-
-    for epoch in range(opt.nepoch):
-        tic = time.time()
-        btic = time.time()
-        for data, _ in train_data:
-            ############################
-            # (1) Update D network: maximize log(D(x)) + log(1 - D(G(z)))
-            ###########################
-            # train with real_t
-            data = data.as_in_context(ctx)
-            noise = mx.nd.random.normal(0, 1, shape=(opt.batch_size, nz, 1, 1), ctx=ctx)
-
-            with autograd.record():
-                output = netD(data)
-                # reshape output from (opt.batch_size, 2, 1, 1) to (opt.batch_size, 2)
-                output = output.reshape((opt.batch_size, 2))
-                errD_real = loss(output, real_label)
-
-            metric.update([real_label, ], [output, ])
-
-            with autograd.record():
-                fake = netG(noise)
-                output = netD(fake.detach())
-                output = output.reshape((opt.batch_size, 2))
-                errD_fake = loss(output, fake_label)
-                errD = errD_real + errD_fake
-
-            errD.backward()
-            metric.update([fake_label,], [output,])
-
-            trainerD.step(opt.batch_size)
-
-            ############################
-            # (2) Update G network: maximize log(D(G(z)))
-            ###########################
-            with autograd.record():
-                output = netD(fake)
-                output = output.reshape((-1, 2))
-                errG = loss(output, real_label)
-
-            errG.backward()
-
-            trainerG.step(opt.batch_size)
-
-            name, acc = metric.get()
-            logging.info('discriminator loss = %f, generator loss = %f, binary training acc = %f at iter %d epoch %d'
-                         , mx.nd.mean(errD).asscalar(), mx.nd.mean(errG).asscalar(), acc, iter, epoch)
-            if iter % niter == 0:
-                visual('gout', fake.asnumpy(), name=os.path.join(outf, 'fake_img_iter_%d.png' % iter))
-                visual('data', data.asnumpy(), name=os.path.join(outf, 'real_img_iter_%d.png' % iter))
-                # record the metric data
-                loss_d.append(errD)
-                loss_g.append(errG)
-                if opt.inception_score:
-                    score, _ = get_inception_score(fake)
-                    inception_score.append(score)
-
-            iter = iter + 1
-            btic = time.time()
-
-        name, acc = metric.get()
-        metric.reset()
-        logging.info('\nbinary training acc at epoch %d: %s=%f', epoch, name, acc)
-        logging.info('time: %f', time.time() - tic)
-
-        # save check_point
-        if check_point:
-            netG.save_parameters(os.path.join(outf, 'generator_epoch_%d.params' %epoch))
-            netD.save_parameters(os.path.join(outf, 'discriminator_epoch_%d.params' % epoch))
-
-    # save parameter
-    netG.save_parameters(os.path.join(outf, 'generator.params'))
-    netD.save_parameters(os.path.join(outf, 'discriminator.params'))
-
-    # visualization the inception_score as a picture
-    if opt.inception_score:
-        ins_save(inception_score)
-
-
-if __name__ == '__main__':
-    if opt.inception_score:
-        print("Use inception_score to metric this DCgan model, the reusult is save as a picture "
-              "named \"inception_score.png\"!")
-    main()
diff --git a/example/gluon/dc_gan/inception_score.py b/example/gluon/dc_gan/inception_score.py
deleted file mode 100644
index e23513f..0000000
--- a/example/gluon/dc_gan/inception_score.py
+++ /dev/null
@@ -1,110 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from mxnet.gluon.model_zoo import vision as models
-import mxnet as mx
-from mxnet import nd
-import numpy as np
-import math
-import sys
-
-import cv2
-
-
-inception_model = None
-
-
-def get_inception_score(images, splits=10):
-    """
-    Inception_score function.
-        The images will be divided into 'splits' parts, and calculate each inception_score separately,
-        then return the mean and std of inception_scores of these parts.
-    :param images: Images(num x c x w x h) that needs to calculate inception_score.
-    :param splits:
-    :return: mean and std of inception_score
-    """
-    assert (images.shape[1] == 3)
-
-    # load inception model
-    if inception_model is None:
-        _init_inception()
-
-    # resize images to adapt inception model(inceptionV3)
-    if images.shape[2] != 299:
-        images = resize(images, 299, 299)
-
-    preds = []
-    bs = 4
-    n_batches = int(math.ceil(float(images.shape[0])/float(bs)))
-
-    # to get the predictions/picture of inception model
-    for i in range(n_batches):
-        sys.stdout.write(".")
-        sys.stdout.flush()
-        inps = images[(i * bs):min((i + 1) * bs, len(images))]
-        # inps size. bs x 3 x 299 x 299
-        pred = nd.softmax(inception_model(inps))
-        # pred size. bs x 1000
-        preds.append(pred.asnumpy())
-
-    # list to array
-    preds = np.concatenate(preds, 0)
-    scores = []
-
-    # to calculate the inception_score each split.
-    for i in range(splits):
-        # extract per split image pred
-        part = preds[(i * preds.shape[0] // splits):((i + 1) * preds.shape[0] // splits), :]
-        kl = part * (np.log(part) - np.log(np.expand_dims(np.mean(part, 0), 0)))
-        kl = np.mean(np.sum(kl, 1))
-        scores.append(np.exp(kl))
-
-    return np.mean(scores), np.std(scores)
-
-
-def _init_inception():
-    global inception_model
-    inception_model = models.inception_v3(pretrained=True)
-    print("success import inception model, and the model is inception_v3!")
-
-
-def resize(images, w, h):
-    nums = images.shape[0]
-    res = nd.random.uniform(0, 255, (nums, 3, w, h))
-    for i in range(nums):
-        img = images[i, :, :, :]
-        img = mx.nd.transpose(img, (1, 2, 0))
-        # Replace 'mx.image.imresize()' with 'cv2.resize()' because : Operator _cvimresize is not implemented for GPU.
-        # img = mx.image.imresize(img, w, h)
-        img = cv2.resize(img.asnumpy(), (299, 299))
-        img = nd.array(img)
-        img = mx.nd.transpose(img, (2, 0, 1))
-        res[i, :, :, :] = img
-
-    return res
-
-
-if __name__ == '__main__':
-    if inception_model is None:
-        _init_inception()
-    # dummy data
-    images = nd.random.uniform(0, 255, (64, 3, 64, 64))
-    print(images.shape[0])
-    # resize(images,299,299)
-
-    score = get_inception_score(images)
-    print(score)
diff --git a/example/gluon/embedding_learning/README.md b/example/gluon/embedding_learning/README.md
deleted file mode 100644
index ee3a0ea..0000000
--- a/example/gluon/embedding_learning/README.md
+++ /dev/null
@@ -1,93 +0,0 @@
-<!--- Licensed to the Apache Software Foundation (ASF) under one -->
-<!--- or more contributor license agreements.  See the NOTICE file -->
-<!--- distributed with this work for additional information -->
-<!--- regarding copyright ownership.  The ASF licenses this file -->
-<!--- to you under the Apache License, Version 2.0 (the -->
-<!--- "License"); you may not use this file except in compliance -->
-<!--- with the License.  You may obtain a copy of the License at -->
-
-<!---   http://www.apache.org/licenses/LICENSE-2.0 -->
-
-<!--- Unless required by applicable law or agreed to in writing, -->
-<!--- software distributed under the License is distributed on an -->
-<!--- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -->
-<!--- KIND, either express or implied.  See the License for the -->
-<!--- specific language governing permissions and limitations -->
-<!--- under the License. -->
-
-# Image Embedding Learning
-
-This example implements embedding learning based on a Margin-based Loss with distance weighted sampling [(Wu et al, 2017)](http://www.philkr.net/papers/2017-10-01-iccv/2017-10-01-iccv.pdf). The model obtains a validation Recall@1 of ~64% on the [Caltech-UCSD Birds-200-2011](http://www.vision.caltech.edu/visipedia/CUB-200-2011.html) dataset.
-
-
-## Usage
-Download the data
-
-Note: the dataset is from [Caltech-UCSD Birds 200](http://www.vision.caltech.edu/visipedia/CUB-200.html).
-These datasets are copyright Caltech Computational Vision Group and licensed CC BY 4.0 Attribution.
-See [original dataset source](http://www.vision.caltech.edu/archive.html) for details
-```bash
-./get_cub200_data.sh
-```
-
-Example runs and the results:
-```
-python3 train.py --data-path=data/CUB_200_2011 --gpus=0,1 --use-pretrained
-```
-
-<br>
-
-`python train.py --help` gives the following arguments:
-```
-optional arguments:
-  -h, --help            show this help message and exit
-  --data-path DATA_PATH
-                        path of data.
-  --embed-dim EMBED_DIM
-                        dimensionality of image embedding. default is 128.
-  --batch-size BATCH_SIZE
-                        training batch size per device (CPU/GPU). default is
-                        70.
-  --batch-k BATCH_K     number of images per class in a batch. default is 5.
-  --gpus GPUS           list of gpus to use, e.g. 0 or 0,2,5. empty means
-                        using cpu.
-  --epochs EPOCHS       number of training epochs. default is 20.
-  --optimizer OPTIMIZER
-                        optimizer. default is adam.
-  --lr LR               learning rate. default is 0.0001.
-  --lr-beta LR_BETA     learning rate for the beta in margin based loss.
-                        default is 0.1.
-  --margin MARGIN       margin for the margin based loss. default is 0.2.
-  --beta BETA           initial value for beta. default is 1.2.
-  --nu NU               regularization parameter for beta. default is 0.0.
-  --factor FACTOR       learning rate schedule factor. default is 0.5.
-  --steps STEPS         epochs to update learning rate. default is
-                        12,14,16,18.
-  --wd WD               weight decay rate. default is 0.0001.
-  --seed SEED           random seed to use. default=123.
-  --model MODEL         type of model to use. see vision_model for options.
-  --save-model-prefix SAVE_MODEL_PREFIX
-                        prefix of models to be saved.
-  --use-pretrained      enable using pretrained model from gluon.
-  --kvstore KVSTORE     kvstore to use for trainer.
-  --log-interval LOG_INTERVAL
-                        number of batches to wait before logging.
-```
-
-## Learned embeddings
-The following visualizes the learned embeddings with t-SNE.
-
-![alt text](https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/example/embedding_learning/cub200_embedding.png)
-
-
-## Citation
-<b>Sampling Matters in Deep Embedding Learning</b> [<a href="https://arxiv.org/abs/1706.07567">paper</a>] [<a href="https://www.cs.utexas.edu/~cywu/projects/sampling_matters/">project</a>]  <br>
-  Chao-Yuan Wu, R. Manmatha, Alexander J. Smola and Philipp Kr&auml;henb&uuml;hl
-<pre>
-@inproceedings{wu2017sampling,
-  title={Sampling Matters in Deep Embedding Learning},
-  author={Wu, Chao-Yuan and Manmatha, R and Smola, Alexander J and Kr{\"a}henb{\"u}hl, Philipp},
-  booktitle={ICCV},
-  year={2017}
-}
-</pre>
diff --git a/example/gluon/embedding_learning/data.py b/example/gluon/embedding_learning/data.py
deleted file mode 100644
index e3b96d6..0000000
--- a/example/gluon/embedding_learning/data.py
+++ /dev/null
@@ -1,158 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import os
-import random
-
-import numpy as np
-
-import mxnet as mx
-from mxnet import nd
-
-def transform(data, target_wd, target_ht, is_train, box):
-    """Crop and normnalize an image nd array."""
-    if box is not None:
-        x, y, w, h = box
-        data = data[y:min(y+h, data.shape[0]), x:min(x+w, data.shape[1])]
-
-    # Resize to target_wd * target_ht.
-    data = mx.image.imresize(data, target_wd, target_ht)
-
-    # Normalize in the same way as the pre-trained model.
-    data = data.astype(np.float32) / 255.0
-    data = (data - mx.nd.array([0.485, 0.456, 0.406])) / mx.nd.array([0.229, 0.224, 0.225])
-
-    if is_train:
-        if random.random() < 0.5:
-            data = nd.flip(data, axis=1)
-        data, _ = mx.image.random_crop(data, (224, 224))
-    else:
-        data, _ = mx.image.center_crop(data, (224, 224))
-
-    # Transpose from (target_wd, target_ht, 3)
-    # to (3, target_wd, target_ht).
-    data = nd.transpose(data, (2, 0, 1))
-
-    # If image is greyscale, repeat 3 times to get RGB image.
-    if data.shape[0] == 1:
-        data = nd.tile(data, (3, 1, 1))
-    return data.reshape((1,) + data.shape)
-
-
-class CUB200Iter(mx.io.DataIter):
-    """Iterator for the CUB200-2011 dataset.
-    Parameters
-    ----------
-    data_path : str,
-        The path to dataset directory.
-    batch_k : int,
-        Number of images per class in a batch.
-    batch_size : int,
-        Batch size.
-    batch_size : tupple,
-        Data shape. E.g. (3, 224, 224).
-    is_train : bool,
-        Training data or testig data. Training batches are randomly sampled.
-        Testing batches are loaded sequentially until reaching the end.
-    """
-    def __init__(self, data_path, batch_k, batch_size, data_shape, is_train):
-        super(CUB200Iter, self).__init__(batch_size)
-        self.data_shape = (batch_size,) + data_shape
-        self.batch_size = batch_size
-        self.provide_data = [('data', self.data_shape)]
-        self.batch_k = batch_k
-        self.is_train = is_train
-
-        self.train_image_files = [[] for _ in range(100)]
-        self.test_image_files = []
-        self.test_labels = []
-        self.boxes = {}
-        self.test_count = 0
-
-        with open(os.path.join(data_path, 'images.txt'), 'r') as f_img, \
-             open(os.path.join(data_path, 'image_class_labels.txt'), 'r') as f_label, \
-             open(os.path.join(data_path, 'bounding_boxes.txt'), 'r') as f_box:
-            for line_img, line_label, line_box in zip(f_img, f_label, f_box):
-                fname = os.path.join(data_path, 'images', line_img.strip().split()[-1])
-                label = int(line_label.strip().split()[-1]) - 1
-                box = [int(float(v)) for v in line_box.split()[-4:]]
-                self.boxes[fname] = box
-
-                # Following "Deep Metric Learning via Lifted Structured Feature Embedding" paper,
-                # we use the first 100 classes for training, and the remaining for testing.
-                if label < 100:
-                    self.train_image_files[label].append(fname)
-                else:
-                    self.test_labels.append(label)
-                    self.test_image_files.append(fname)
-
-        self.n_test = len(self.test_image_files)
-
-    def get_image(self, img, is_train):
-        """Load and transform an image."""
-        img_arr = mx.image.imread(img)
-        img_arr = transform(img_arr, 256, 256, is_train, self.boxes[img])
-        return img_arr
-
-    def sample_train_batch(self):
-        """Sample a training batch (data and label)."""
-        batch = []
-        labels = []
-        num_groups = self.batch_size // self.batch_k
-
-        # For CUB200, we use the first 100 classes for training.
-        sampled_classes = np.random.choice(100, num_groups, replace=False)
-        for i in range(num_groups):
-            img_fnames = np.random.choice(self.train_image_files[sampled_classes[i]],
-                                          self.batch_k, replace=False)
-            batch += [self.get_image(img_fname, is_train=True) for img_fname in img_fnames]
-            labels += [sampled_classes[i] for _ in range(self.batch_k)]
-
-        return nd.concatenate(batch, axis=0), labels
-
-    def get_test_batch(self):
-        """Sample a testing batch (data and label)."""
-
-        batch_size = self.batch_size
-        batch = [self.get_image(self.test_image_files[(self.test_count*batch_size + i)
-                                                      % len(self.test_image_files)],
-                                is_train=False) for i in range(batch_size)]
-        labels = [self.test_labels[(self.test_count*batch_size + i)
-                                   % len(self.test_image_files)] for i in range(batch_size)]
-        return nd.concatenate(batch, axis=0), labels
-
-    def reset(self):
-        """Reset an iterator."""
-        self.test_count = 0
-
-    def next(self):
-        """Return a batch."""
-        if self.is_train:
-            data, labels = self.sample_train_batch()
-        else:
-            if self.test_count * self.batch_size < len(self.test_image_files):
-                data, labels = self.get_test_batch()
-                self.test_count += 1
-            else:
-                self.test_count = 0
-                raise StopIteration
-        return mx.io.DataBatch(data=[data], label=[labels])
-
-def cub200_iterator(data_path, batch_k, batch_size, data_shape):
-    """Return training and testing iterator for the CUB200-2011 dataset."""
-    return (CUB200Iter(data_path, batch_k, batch_size, data_shape, is_train=True),
-            CUB200Iter(data_path, batch_k, batch_size, data_shape, is_train=False))
diff --git a/example/gluon/embedding_learning/get_cub200_data.sh b/example/gluon/embedding_learning/get_cub200_data.sh
deleted file mode 100755
index 4cf83e7..0000000
--- a/example/gluon/embedding_learning/get_cub200_data.sh
+++ /dev/null
@@ -1,34 +0,0 @@
-#!/usr/bin/env bash
-
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-
-EMB_DIR=$(cd `dirname $0`; pwd)
-DATA_DIR="${EMB_DIR}/data/"
-
-if [[ ! -d "${DATA_DIR}" ]]; then
-  echo "${DATA_DIR} doesn't exist, will create one.";
-  mkdir -p ${DATA_DIR}
-fi
-
-# the dataset is from Caltech-UCSD Birds 200
-# http://www.vision.caltech.edu/visipedia/CUB-200.html
-# These datasets are copyright Caltech Computational Vision Group and licensed CC BY 4.0 Attribution.
-# See http://www.vision.caltech.edu/archive.html for details
-wget -P ${DATA_DIR} http://www.vision.caltech.edu/visipedia-data/CUB-200-2011/CUB_200_2011.tgz
-cd ${DATA_DIR}; tar -xf CUB_200_2011.tgz
diff --git a/example/gluon/embedding_learning/model.py b/example/gluon/embedding_learning/model.py
deleted file mode 100644
index f82240e..0000000
--- a/example/gluon/embedding_learning/model.py
+++ /dev/null
@@ -1,230 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-
-from mxnet import gluon
-from mxnet.gluon import nn, Block, HybridBlock
-import numpy as np
-
-class L2Normalization(HybridBlock):
-    r"""Applies L2 Normalization to input.
-
-    Parameters
-    ----------
-    mode : str
-        Mode of normalization.
-        See :func:`~mxnet.ndarray.L2Normalization` for available choices.
-
-    Inputs:
-        - **data**: input tensor with arbitrary shape.
-
-    Outputs:
-        - **out**: output tensor with the same shape as `data`.
-    """
-    def __init__(self, mode, **kwargs):
-        self._mode = mode
-        super(L2Normalization, self).__init__(**kwargs)
-
-    def hybrid_forward(self, F, x):
-        return F.L2Normalization(x, mode=self._mode, name='l2_norm')
-
-    def __repr__(self):
-        s = '{name}({_mode})'
-        return s.format(name=self.__class__.__name__,
-                        **self.__dict__)
-
-
-def get_distance(F, x):
-    """Helper function for margin-based loss. Return a distance matrix given a matrix."""
-    n = x.shape[0]
-
-    square = F.sum(x ** 2.0, axis=1, keepdims=True)
-    distance_square = square + square.transpose() - (2.0 * F.dot(x, x.transpose()))
-
-    # Adding identity to make sqrt work.
-    return F.sqrt(distance_square + F.array(np.identity(n)))
-
-class DistanceWeightedSampling(HybridBlock):
-    r"""Distance weighted sampling. See "sampling matters in deep embedding learning"
-    paper for details.
-
-    Parameters
-    ----------
-    batch_k : int
-        Number of images per class.
-
-    Inputs:
-        - **data**: input tensor with shape (batch_size, embed_dim).
-        Here we assume the consecutive batch_k examples are of the same class.
-        For example, if batch_k = 5, the first 5 examples belong to the same class,
-        6th-10th examples belong to another class, etc.
-
-    Outputs:
-        - a_indices: indices of anchors.
-        - x[a_indices]: sampled anchor embeddings.
-        - x[p_indices]: sampled positive embeddings.
-        - x[n_indices]: sampled negative embeddings.
-        - x: embeddings of the input batch.
-    """
-    def __init__(self, batch_k, cutoff=0.5, nonzero_loss_cutoff=1.4, **kwargs):
-        self.batch_k = batch_k
-        self.cutoff = cutoff
-
-        # We sample only from negatives that induce a non-zero loss.
-        # These are negatives with a distance < nonzero_loss_cutoff.
-        # With a margin-based loss, nonzero_loss_cutoff == margin + beta.
-        self.nonzero_loss_cutoff = nonzero_loss_cutoff
-        super(DistanceWeightedSampling, self).__init__(**kwargs)
-
-    def hybrid_forward(self, F, x):
-        k = self.batch_k
-        n, d = x.shape
-
-        distance = get_distance(F, x)
-        # Cut off to avoid high variance.
-        distance = F.maximum(distance, self.cutoff)
-
-        # Subtract max(log(distance)) for stability.
-        log_weights = ((2.0 - float(d)) * F.log(distance)
-                       - (float(d - 3) / 2) * F.log(1.0 - 0.25 * (distance ** 2.0)))
-        weights = F.exp(log_weights - F.max(log_weights))
-
-        # Sample only negative examples by setting weights of
-        # the same-class examples to 0.
-        mask = np.ones(weights.shape)
-        for i in range(0, n, k):
-            mask[i:i+k, i:i+k] = 0
-        mask_uniform_probs = mask * (1.0/(n-k))
-
-        weights = weights * F.array(mask) * (distance < self.nonzero_loss_cutoff)
-        weights_sum = F.sum(weights, axis=1, keepdims=True)
-        weights = weights / weights_sum
-
-        a_indices = []
-        p_indices = []
-        n_indices = []
-
-        np_weights = weights.asnumpy()
-        for i in range(n):
-            block_idx = i // k
-
-            if weights_sum[i] != 0:
-                n_indices += np.random.choice(n, k-1, p=np_weights[i]).tolist()
-            else:
-                # all samples are above the cutoff so we sample uniformly
-                n_indices += np.random.choice(n, k-1, p=mask_uniform_probs[i]).tolist()
-            for j in range(block_idx * k, (block_idx + 1) * k):
-                if j != i:
-                    a_indices.append(i)
-                    p_indices.append(j)
-
-        return a_indices, x[a_indices], x[p_indices], x[n_indices], x
-
-    def __repr__(self):
-        s = '{name}({batch_k})'
-        return s.format(name=self.__class__.__name__,
-                        **self.__dict__)
-
-
-class MarginNet(Block):
-    r"""Embedding network with distance weighted sampling.
-    It takes a base CNN and adds an embedding layer and a
-    sampling layer at the end.
-
-    Parameters
-    ----------
-    base_net : Block
-        Base network.
-    emb_dim : int
-        Dimensionality of the embedding.
-    batch_k : int
-        Number of images per class in a batch. Used in sampling.
-
-    Inputs:
-        - **data**: input tensor with shape (batch_size, channels, width, height).
-        Here we assume the consecutive batch_k images are of the same class.
-        For example, if batch_k = 5, the first 5 images belong to the same class,
-        6th-10th images belong to another class, etc.
-
-    Outputs:
-        - The output of DistanceWeightedSampling.
-    """
-    def __init__(self, base_net, emb_dim, batch_k, **kwargs):
-        super(MarginNet, self).__init__(**kwargs)
-        with self.name_scope():
-            self.base_net = base_net
-            self.dense = nn.Dense(emb_dim)
-            self.normalize = L2Normalization(mode='instance')
-            self.sampled = DistanceWeightedSampling(batch_k=batch_k)
-
-    def forward(self, x):
-        z = self.base_net(x)
-        z = self.dense(z)
-        z = self.normalize(z)
-        z = self.sampled(z)
-        return z
-
-
-class MarginLoss(gluon.loss.Loss):
-    r"""Margin based loss.
-
-    Parameters
-    ----------
-    margin : float
-        Margin between positive and negative pairs.
-    nu : float
-        Regularization parameter for beta.
-
-    Inputs:
-        - anchors: sampled anchor embeddings.
-        - positives: sampled positive embeddings.
-        - negatives: sampled negative embeddings.
-        - beta_in: class-specific betas.
-        - a_indices: indices of anchors. Used to get class-specific beta.
-
-    Outputs:
-        - Loss.
-    """
-    def __init__(self, margin=0.2, nu=0.0, weight=None, batch_axis=0, **kwargs):
-        super(MarginLoss, self).__init__(weight, batch_axis, **kwargs)
-        self._margin = margin
-        self._nu = nu
-
-    def hybrid_forward(self, F, anchors, positives, negatives, beta_in, a_indices=None):
-        if a_indices is not None:
-            # Jointly train class-specific beta.
-            beta = beta_in.data()[a_indices]
-            beta_reg_loss = F.sum(beta) * self._nu
-        else:
-            # Use a constant beta.
-            beta = beta_in
-            beta_reg_loss = 0.0
-
-        d_ap = F.sqrt(F.sum(F.square(positives - anchors), axis=1) + 1e-8)
-        d_an = F.sqrt(F.sum(F.square(negatives - anchors), axis=1) + 1e-8)
-
-        pos_loss = F.maximum(d_ap - beta + self._margin, 0.0)
-        neg_loss = F.maximum(beta - d_an + self._margin, 0.0)
-
-        pair_cnt = F.sum((pos_loss > 0.0) + (neg_loss > 0.0))
-        if pair_cnt == 0.0:
-            # When poss_loss and neg_loss is zero then total loss is zero as well
-            loss = F.sum(pos_loss + neg_loss)
-        else:
-            # Normalize based on the number of pairs.
-            loss = (F.sum(pos_loss + neg_loss) + beta_reg_loss) / pair_cnt
-        return gluon.loss._apply_weighting(F, loss, self._weight, None)
diff --git a/example/gluon/embedding_learning/train.py b/example/gluon/embedding_learning/train.py
deleted file mode 100644
index b8a5bf2..0000000
--- a/example/gluon/embedding_learning/train.py
+++ /dev/null
@@ -1,255 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from __future__ import division
-
-import argparse
-import logging
-import time
-
-import numpy as np
-from bottleneck import argpartition
-
-import mxnet as mx
-from data import cub200_iterator
-from mxnet import gluon
-from mxnet.gluon.model_zoo import vision as models
-from mxnet import autograd as ag, nd
-from model import MarginNet, MarginLoss
-
-logging.basicConfig(level=logging.INFO)
-
-# CLI
-parser = argparse.ArgumentParser(description='train a model for image classification.')
-parser.add_argument('--data-path', type=str, default='data/CUB_200_2011',
-                    help='path of data.')
-parser.add_argument('--embed-dim', type=int, default=128,
-                    help='dimensionality of image embedding. default is 128.')
-parser.add_argument('--batch-size', type=int, default=70,
-                    help='training batch size per device (CPU/GPU). default is 70.')
-parser.add_argument('--batch-k', type=int, default=5,
-                    help='number of images per class in a batch. default is 5.')
-parser.add_argument('--gpus', type=str, default='',
-                    help='list of gpus to use, e.g. 0 or 0,2,5. empty means using cpu.')
-parser.add_argument('--epochs', type=int, default=20,
-                    help='number of training epochs. default is 20.')
-parser.add_argument('--optimizer', type=str, default='adam',
-                    help='optimizer. default is adam.')
-parser.add_argument('--lr', type=float, default=0.0001,
-                    help='learning rate. default is 0.0001.')
-parser.add_argument('--lr-beta', type=float, default=0.1,
-                    help='learning rate for the beta in margin based loss. default is 0.1.')
-parser.add_argument('--margin', type=float, default=0.2,
-                    help='margin for the margin based loss. default is 0.2.')
-parser.add_argument('--beta', type=float, default=1.2,
-                    help='initial value for beta. default is 1.2.')
-parser.add_argument('--nu', type=float, default=0.0,
-                    help='regularization parameter for beta. default is 0.0.')
-parser.add_argument('--factor', type=float, default=0.5,
-                    help='learning rate schedule factor. default is 0.5.')
-parser.add_argument('--steps', type=str, default='12,14,16,18',
-                    help='epochs to update learning rate. default is 12,14,16,18.')
-parser.add_argument('--wd', type=float, default=0.0001,
-                    help='weight decay rate. default is 0.0001.')
-parser.add_argument('--seed', type=int, default=123,
-                    help='random seed to use. default=123.')
-parser.add_argument('--model', type=str, default='resnet50_v2',
-                    help='type of model to use. see vision_model for options.')
-parser.add_argument('--save-model-prefix', type=str, default='margin_loss_model',
-                    help='prefix of models to be saved.')
-parser.add_argument('--use-pretrained', action='store_true',
-                    help='enable using pretrained model from gluon.')
-parser.add_argument('--kvstore', type=str, default='device',
-                    help='kvstore to use for trainer.')
-parser.add_argument('--log-interval', type=int, default=20,
-                    help='number of batches to wait before logging.')
-opt = parser.parse_args()
-
-logging.info(opt)
-
-# Settings.
-mx.random.seed(opt.seed)
-np.random.seed(opt.seed)
-
-batch_size = opt.batch_size
-
-gpus = [] if opt.gpus is None or opt.gpus is '' else [
-    int(gpu) for gpu in opt.gpus.split(',')]
-num_gpus = len(gpus)
-
-batch_size *= max(1, num_gpus)
-context = [mx.gpu(i) for i in gpus] if num_gpus > 0 else [mx.cpu()]
-steps = [int(step) for step in opt.steps.split(',')]
-
-# Construct model.
-kwargs = {'ctx': context, 'pretrained': opt.use_pretrained}
-net = models.get_model(opt.model, **kwargs)
-
-if opt.use_pretrained:
-    # Use a smaller learning rate for pre-trained convolutional layers.
-    for v in net.collect_params().values():
-        if 'conv' in v.name:
-            setattr(v, 'lr_mult', 0.01)
-
-net.hybridize()
-net = MarginNet(net.features, opt.embed_dim, opt.batch_k)
-beta = mx.gluon.Parameter('beta', shape=(100,))
-
-# Get iterators.
-train_data, val_data = cub200_iterator(opt.data_path, opt.batch_k, batch_size, (3, 224, 224))
-
-
-def get_distance_matrix(x):
-    """Get distance matrix given a matrix. Used in testing."""
-    square = nd.sum(x ** 2.0, axis=1, keepdims=True)
-    distance_square = square + square.transpose() - (2.0 * nd.dot(x, x.transpose()))
-    return nd.sqrt(distance_square)
-
-
-def evaluate_emb(emb, labels):
-    """Evaluate embeddings based on Recall@k."""
-    d_mat = get_distance_matrix(emb)
-    d_mat = d_mat.asnumpy()
-    labels = labels.asnumpy()
-
-    names = []
-    accs = []
-    for k in [1, 2, 4, 8, 16]:
-        names.append('Recall@%d' % k)
-        correct, cnt = 0.0, 0.0
-        for i in range(emb.shape[0]):
-            d_mat[i, i] = 1e10
-            nns = argpartition(d_mat[i], k)[:k]
-            if any(labels[i] == labels[nn] for nn in nns):
-                correct += 1
-            cnt += 1
-        accs.append(correct/cnt)
-    return names, accs
-
-
-def test(ctx):
-    """Test a model."""
-    val_data.reset()
-    outputs = []
-    labels = []
-    for batch in val_data:
-        data = gluon.utils.split_and_load(batch.data[0], ctx_list=ctx, batch_axis=0)
-        label = gluon.utils.split_and_load(batch.label[0], ctx_list=ctx, batch_axis=0)
-        for x in data:
-            outputs.append(net(x)[-1])
-        labels += label
-
-    outputs = nd.concatenate(outputs, axis=0)[:val_data.n_test]
-    labels = nd.concatenate(labels, axis=0)[:val_data.n_test]
-    return evaluate_emb(outputs, labels)
-
-
-def get_lr(lr, epoch, steps, factor):
-    """Get learning rate based on schedule."""
-    for s in steps:
-        if epoch >= s:
-            lr *= factor
-    return lr
-
-
-def train(epochs, ctx):
-    """Training function."""
-    if isinstance(ctx, mx.Context):
-        ctx = [ctx]
-    net.initialize(mx.init.Xavier(magnitude=2), ctx=ctx)
-
-    opt_options = {'learning_rate': opt.lr, 'wd': opt.wd}
-    if opt.optimizer == 'sgd':
-        opt_options['momentum'] = 0.9
-    if opt.optimizer == 'adam':
-        opt_options['epsilon'] = 1e-7
-    trainer = gluon.Trainer(net.collect_params(), opt.optimizer,
-                            opt_options,
-                            kvstore=opt.kvstore)
-    if opt.lr_beta > 0.0:
-        # Jointly train class-specific beta.
-        # See "sampling matters in deep embedding learning" paper for details.
-        beta.initialize(mx.init.Constant(opt.beta), ctx=ctx)
-        trainer_beta = gluon.Trainer([beta], 'sgd',
-                                     {'learning_rate': opt.lr_beta, 'momentum': 0.9},
-                                     kvstore=opt.kvstore)
-
-    loss = MarginLoss(margin=opt.margin, nu=opt.nu)
-
-    best_val = 0.0
-    for epoch in range(epochs):
-        tic = time.time()
-        prev_loss, cumulative_loss = 0.0, 0.0
-
-        # Learning rate schedule.
-        trainer.set_learning_rate(get_lr(opt.lr, epoch, steps, opt.factor))
-        logging.info('Epoch %d learning rate=%f', epoch, trainer.learning_rate)
-        if opt.lr_beta > 0.0:
-            trainer_beta.set_learning_rate(get_lr(opt.lr_beta, epoch, steps, opt.factor))
-            logging.info('Epoch %d beta learning rate=%f', epoch, trainer_beta.learning_rate)
-
-        # Inner training loop.
-        for i in range(200):
-            batch = train_data.next()
-            data = gluon.utils.split_and_load(batch.data[0], ctx_list=ctx, batch_axis=0)
-            label = gluon.utils.split_and_load(batch.label[0], ctx_list=ctx, batch_axis=0)
-
-            Ls = []
-            with ag.record():
-                for x, y in zip(data, label):
-                    a_indices, anchors, positives, negatives, _ = net(x)
-
-                    if opt.lr_beta > 0.0:
-                        L = loss(anchors, positives, negatives, beta, y[a_indices])
-                    else:
-                        L = loss(anchors, positives, negatives, opt.beta, None)
-
-                    # Store the loss and do backward after we have done forward
-                    # on all GPUs for better speed on multiple GPUs.
-                    Ls.append(L)
-                    cumulative_loss += nd.mean(L).asscalar()
-
-                for L in Ls:
-                    L.backward()
-
-            # Update.
-            trainer.step(batch.data[0].shape[0])
-            if opt.lr_beta > 0.0:
-                trainer_beta.step(batch.data[0].shape[0])
-
-            if (i+1) % opt.log_interval == 0:
-                logging.info('[Epoch %d, Iter %d] training loss=%f' % (
-                    epoch, i+1, cumulative_loss - prev_loss))
-                prev_loss = cumulative_loss
-
-        logging.info('[Epoch %d] training loss=%f'%(epoch, cumulative_loss))
-        logging.info('[Epoch %d] time cost: %f'%(epoch, time.time()-tic))
-
-        names, val_accs = test(ctx)
-        for name, val_acc in zip(names, val_accs):
-            logging.info('[Epoch %d] validation: %s=%f'%(epoch, name, val_acc))
-
-        if val_accs[0] > best_val:
-            best_val = val_accs[0]
-            logging.info('Saving %s.' % opt.save_model_prefix)
-            net.save_parameters('%s.params' % opt.save_model_prefix)
-    return best_val
-
-
-if __name__ == '__main__':
-    best_val_recall = train(opt.epochs, context)
-    print('Best validation Recall@1: %.2f.' % best_val_recall)
diff --git a/example/gluon/house_prices/kaggle_k_fold_cross_validation.py b/example/gluon/house_prices/kaggle_k_fold_cross_validation.py
index 420e6fc..52ddf0e 100644
--- a/example/gluon/house_prices/kaggle_k_fold_cross_validation.py
+++ b/example/gluon/house_prices/kaggle_k_fold_cross_validation.py
@@ -26,11 +26,11 @@
 # The link to the problem on Kaggle:
 # https://www.kaggle.com/c/house-prices-advanced-regression-techniques
 
-import numpy as np
+import numpy as onp
 import pandas as pd
 from mxnet import autograd
 from mxnet import gluon
-from mxnet import ndarray as nd
+from mxnet import np
 
 # After logging in www.kaggle.com, the training and testing data sets can be downloaded at:
 # https://www.kaggle.com/c/house-prices-advanced-regression-techniques/download/train.csv
@@ -56,26 +56,25 @@ X_train = all_X[:num_train].as_matrix()
 X_test = all_X[num_train:].as_matrix()
 y_train = train.SalePrice.as_matrix()
 
-X_train = nd.array(X_train)
-y_train = nd.array(y_train)
+X_train = np.array(X_train)
+y_train = np.array(y_train)
 y_train.reshape((num_train, 1))
 
-X_test = nd.array(X_test)
+X_test = np.array(X_test)
 square_loss = gluon.loss.L2Loss()
 
 def get_rmse_log(net, X_train, y_train):
     """Gets root mse between the logarithms of the prediction and the truth."""
     num_train = X_train.shape[0]
-    clipped_preds = nd.clip(net(X_train), 1, float('inf'))
-    return np.sqrt(2 * nd.sum(square_loss(
-        nd.log(clipped_preds), nd.log(y_train))).asscalar() / num_train)
+    clipped_preds = np.clip(net(X_train), 1, float('inf'))
+    return np.sqrt(2 * np.sum(square_loss(
+        np.log(clipped_preds), np.log(y_train))).item() / num_train)
 
 def get_net():
     """Gets a neural network. Better results are obtained with modifications."""
     net = gluon.nn.Sequential()
-    with net.name_scope():
-        net.add(gluon.nn.Dense(50, activation="relu"))
-        net.add(gluon.nn.Dense(1))
+    net.add(gluon.nn.Dense(50, activation="relu"))
+    net.add(gluon.nn.Dense(1))
     net.initialize()
     return net
 
@@ -123,8 +122,8 @@ def k_fold_cross_valid(k, epochs, verbose_epoch, X_train, y_train,
                     y_val_train = y_cur_fold
                     val_train_defined = True
                 else:
-                    X_val_train = nd.concat(X_val_train, X_cur_fold, dim=0)
-                    y_val_train = nd.concat(y_val_train, y_cur_fold, dim=0)
+                    X_val_train = np.concatenate([X_val_train, X_cur_fold], axis=0)
+                    y_val_train = np.concatenate([y_val_train, y_cur_fold], axis=0)
         net = get_net()
         train_loss = train(net, X_val_train, y_val_train, epochs, verbose_epoch,
                            learning_rate, weight_decay, batch_size)
diff --git a/example/gluon/lipnet/.gitignore b/example/gluon/lipnet/.gitignore
deleted file mode 100644
index 9a6ee99..0000000
--- a/example/gluon/lipnet/.gitignore
+++ /dev/null
@@ -1,3 +0,0 @@
-__pycache__/
-utils/*.dat
-
diff --git a/example/gluon/lipnet/BeamSearch.py b/example/gluon/lipnet/BeamSearch.py
deleted file mode 100644
index 1b41bc0..0000000
--- a/example/gluon/lipnet/BeamSearch.py
+++ /dev/null
@@ -1,170 +0,0 @@
-#!/usr/bin/env python3
-
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""
-Module : this module to decode using beam search
-https://github.com/ThomasDelteil/HandwrittenTextRecognition_MXNet/blob/master/utils/CTCDecoder/BeamSearch.py 
-"""
-
-from __future__ import division
-from __future__ import print_function
-import numpy as np
-
-class BeamEntry:
-    """
-    information about one single beam at specific time-step
-    """
-    def __init__(self):
-        self.prTotal = 0 # blank and non-blank
-        self.prNonBlank = 0 # non-blank
-        self.prBlank = 0 # blank
-        self.prText = 1 # LM score
-        self.lmApplied = False # flag if LM was already applied to this beam
-        self.labeling = () # beam-labeling
-
-class BeamState:
-    """
-    information about the beams at specific time-step
-    """
-    def __init__(self):
-        self.entries = {}
-        
-    def norm(self):
-        """
-        length-normalise LM score
-        """
-        for (k, _) in self.entries.items():
-            labelingLen = len(self.entries[k].labeling)
-            self.entries[k].prText = self.entries[k].prText ** (1.0 / (labelingLen if labelingLen else 1.0))
-
-    def sort(self):
-        """
-        return beam-labelings, sorted by probability
-        """
-        beams = [v for (_, v) in self.entries.items()]
-        sortedBeams = sorted(beams, reverse=True, key=lambda x: x.prTotal*x.prText)
-        return [x.labeling for x in sortedBeams]
-
-def applyLM(parentBeam, childBeam, classes, lm):
-    """
-    calculate LM score of child beam by taking score from parent beam and bigram probability of last two chars
-    """
-    if lm and not childBeam.lmApplied:
-        c1 = classes[parentBeam.labeling[-1] if parentBeam.labeling else classes.index(' ')] # first char
-        c2 = classes[childBeam.labeling[-1]] # second char
-        lmFactor = 0.01 # influence of language model
-        bigramProb = lm.getCharBigram(c1, c2) ** lmFactor # probability of seeing first and second char next to each other
-        childBeam.prText = parentBeam.prText * bigramProb # probability of char sequence
-        childBeam.lmApplied = True # only apply LM once per beam entry
-
-def addBeam(beamState, labeling):
-    """
-    add beam if it does not yet exist
-    """
-    if labeling not in beamState.entries:
-        beamState.entries[labeling] = BeamEntry()
-
-def ctcBeamSearch(mat, classes, lm, k, beamWidth):
-    """
-    beam search as described by the paper of Hwang et al. and the paper of Graves et al.
-    """
-
-    blankIdx = len(classes)
-    maxT, maxC = mat.shape
-
-    # initialise beam state
-    last = BeamState()
-    labeling = ()
-    last.entries[labeling] = BeamEntry()
-    last.entries[labeling].prBlank = 1
-    last.entries[labeling].prTotal = 1
-
-    # go over all time-steps
-    for t in range(maxT):
-        curr = BeamState()
-
-        # get beam-labelings of best beams
-        bestLabelings = last.sort()[0:beamWidth]
-
-	    # go over best beams
-        for labeling in bestLabelings:
-
-	        # probability of paths ending with a non-blank
-            prNonBlank = 0
-	        # in case of non-empty beam
-            if labeling:
-		       # probability of paths with repeated last char at the end
-                try: 
-                    prNonBlank = last.entries[labeling].prNonBlank * mat[t, labeling[-1]]
-                except FloatingPointError:
-                    prNonBlank = 0
-
-	    # probability of paths ending with a blank
-            prBlank = (last.entries[labeling].prTotal) * mat[t, blankIdx]
-
-	    # add beam at current time-step if needed
-            addBeam(curr, labeling)
-
-            # fill in data
-            curr.entries[labeling].labeling = labeling
-            curr.entries[labeling].prNonBlank += prNonBlank
-            curr.entries[labeling].prBlank += prBlank
-            curr.entries[labeling].prTotal += prBlank + prNonBlank
-            curr.entries[labeling].prText = last.entries[labeling].prText # beam-labeling not changed, therefore also LM score unchanged from
-            curr.entries[labeling].lmApplied = True # LM already applied at previous time-step for this beam-labeling
-
-            # extend current beam-labeling
-            for c in range(maxC - 1):
-                # add new char to current beam-labeling
-                newLabeling = labeling + (c,)
-
-                # if new labeling contains duplicate char at the end, only consider paths ending with a blank
-                if labeling and labeling[-1] == c:
-                    prNonBlank = mat[t, c] * last.entries[labeling].prBlank
-                else:
-                    prNonBlank = mat[t, c] * last.entries[labeling].prTotal
-
-		        # add beam at current time-step if needed
-                addBeam(curr, newLabeling)
-				
-		        # fill in data
-                curr.entries[newLabeling].labeling = newLabeling
-                curr.entries[newLabeling].prNonBlank += prNonBlank
-                curr.entries[newLabeling].prTotal += prNonBlank
-				
-		        # apply LM
-                applyLM(curr.entries[labeling], curr.entries[newLabeling], classes, lm)
-
-        # set new beam state
-        last = curr
-
-    # normalise LM scores according to beam-labeling-length
-    last.norm()
-
-    # sort by probability
-    bestLabelings = last.sort()[:k] # get most probable labeling
-
-    output = []
-    for bestLabeling in bestLabelings:
-        # map labels to chars
-        res = ''
-        for l in bestLabeling:
-            res += classes[l]
-        output.append(res)
-    return output
\ No newline at end of file
diff --git a/example/gluon/lipnet/README.md b/example/gluon/lipnet/README.md
deleted file mode 100644
index 89c27a1..0000000
--- a/example/gluon/lipnet/README.md
+++ /dev/null
@@ -1,254 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
---->
-
-# LipNet: End-to-End Sentence-level Lipreading
-
----
-
-This is a Gluon implementation of [LipNet: End-to-End Sentence-level Lipreading](https://arxiv.org/abs/1611.01599)
-
-![net_structure](asset/network_structure.png)
-
-![sample output](https://user-images.githubusercontent.com/11376047/52533982-d7227680-2d7e-11e9-9f18-c15b952faf0e.png)
-
-## Requirements
-- Python 3.6.4
-- MXNet 1.3.0
-- Required disk space: 35 GB
-```
-pip install -r requirements.txt
-```
-
----
-
-## The Data
-- The GRID audiovisual sentence corpus (http://spandh.dcs.shef.ac.uk/gridcorpus/)
-  - GRID is a large multi-talker audiovisual sentence corpus to support joint computational-behavioral studies in speech perception. In brief, the corpus consists of high-quality audio and video (facial) recordings of 1000 sentences spoken by each of 34 talkers (18 male, 16 female). Sentences are of the form "put red at G9 now". The corpus, together with transcriptions, is freely available for research use.
-- Video: (normal)(480 M each)
-  - Each movie has one sentence consist of 6 words.
-- Align: word alignments (190 K each)
-  - One align has 6 words. Each word has start time and end time. But this tutorial needs just sentence because of using ctc-loss.
-
----
-
-## Pretrained model
-You can train the model yourself in the following sections, you can test a pretrained model's inference, or resume training from the model checkpoint. To work with the provided pretrained model, first download it, then run one of the provided Python scripts for inference (infer.py) or training (main.py).
-
-* Download the [pretrained model](https://github.com/soeque1/temp_files/files/2848870/epoches_81_loss_15.7157.zip)
-* Try inference with the following:
-
-```
-python infer.py model_path='checkpoint/epoches_81_loss_15.7157'
-```
-
-* Resume training with the following:
-
-```
-python main.py model_path='checkpoint/epoches_81_loss_15.7157'
-```
-
-## Prepare the Data
-
-You can prepare the data yourself, or you can download preprocessed data.
-
-### Option 1 - Download the preprocessed data
-
-There are two download routes provided for the preprocessed data.
-
-#### Download and untar the data
-To download tar zipped files by link, download the following files and extract in a folder called `data` in the root of this example folder. You should have the following structure:
-```
-/lipnet/data/align
-/lipnet/data/datasets
-```
-
-* [align files](https://mxnet-public.s3.amazonaws.com/lipnet/data-archives/align.tgz)
-* [datasets files](https://mxnet-public.s3.amazonaws.com/lipnet/data-archives/datasets.tgz)
-
-#### Use AWS CLI to sync the data
-To get the folders and files all unzipped with AWS CLI, can use the following command. This will provide the folder structure for you. Run this command from `/lipnet/`:
-
-```
- aws s3 sync s3://mxnet-public/lipnet/data .
-```
-
-### Option 2 (part 1)- Download the raw dataset
-- Outputs
-  - The Total Movies(mp4): 16GB
-  - The Total Aligns(text): 134MB
-- Arguments
-  - src_path : Path for videos (default='./data/mp4s/')
-  - align_path : Path for aligns (default='./data/')
-  - n_process : num of process (default=1)
-
-```
-cd ./utils && python download_data.py --n_process=$(nproc)
-```
-
-### Option 2 (part 2) Preprocess the raw dataset: Extracting the mouth images from a video and save it
-
-* Using Face Landmark Detection(http://dlib.net/)
-
-#### Preprocess (preprocess_data.py)
-*  If there is no landmark, it download automatically.
-*  Using Face Landmark Detection, It extract the mouth from a video.
-
-- example:
- - video: ./data/mp4s/s2/bbbf7p.mpg
- - align(target): ./data/align/s2/bbbf7p.align
-     : 'sil bin blue by f seven please sil'
-
-
-- Video to the images (75 Frames)
-
-Frame 0            |  Frame 1 | ... | Frame 74 |
-:-------------------------:|:-------------------------:|:-------------------------:|:-------------------------:
-![](asset/s2_bbbf7p_000.png)  |  ![](asset/s2_bbbf7p_001.png) |  ...  |  ![](asset/s2_bbbf7p_074.png)
-
-  - Extract the mouth from images
-
-Frame 0            |  Frame 1 | ... | Frame 74 |
-:-------------------------:|:-------------------------:|:-------------------------:|:-------------------------:
-![](asset/mouth_000.png)  |  ![](asset/mouth_001.png) |  ...  |  ![](asset/mouth_074.png)
-
-* Save the result images into tgt_path.
-
-----
-
-#### How to run the preprocess script
-
-- Arguments
-  - src_path : Path for videos (default='./data/mp4s/')
-  - tgt_path : Path for preprocessed images (default='./data/datasets/')
-  - n_process : num of process (default=1)
-
-- Outputs
-  - The Total Images(png): 19GB
-- Elapsed time
-  - About 54 Hours using 1 process
-  - If you use the multi-processes, you can finish the number of processes faster.
-    - e.g) 9 hours using 6 processes
-
-You can run the preprocessing with just one processor, but this will take a long time (>48 hours). To use all of the available processors, use the following command:
-
-```
-cd ./utils && python preprocess_data.py --n_process=$(nproc)
-```
-
-#### Output: Data structure of the preprocessed data
-
-```
-The training data folder should look like :
-<train_data_root>
-                |--datasets
-                        |--s1
-                           |--bbir7s
-                               |--mouth_000.png
-                               |--mouth_001.png
-                                   ...
-                           |--bgaa8p
-                               |--mouth_000.png
-                               |--mouth_001.png
-                                  ...
-                        |--s2
-                            ...
-                 |--align
-                         |--bw1d8a.align
-                         |--bggzzs.align
-                             ...
-
-```
-
----
-
-## Training
-After you have acquired the preprocessed data you are ready to train the lipnet model.
-
-- According to [LipNet: End-to-End Sentence-level Lipreading](https://arxiv.org/abs/1611.01599), four (S1, S2, S20, S22) of the 34 subjects are used for evaluation.
- The other subjects are used for training.
-
-- To use the multi-gpu, it is recommended to make the batch size $(num_gpus) times larger.
-
-  - e.g) 1-gpu and 128 batch_size > 2-gpus 256 batch_size
-
-
-- arguments
-  - batch_size : Define batch size (default=64)
-  - epochs : Define total epochs (default=100)
-  - image_path : Path for lip image files (default='./data/datasets/')
-  - align_path : Path for align files (default='./data/align/')
-  - dr_rate : Dropout rate(default=0.5)
-  - num_gpus : Num of gpus (if num_gpus is 0, then use cpu) (default=1)
-  - num_workers : Num of workers when generating data (default=0)
-  - model_path : Path of pretrained model (default=None)
-
-```
-python main.py
-```
-
----
-
-## Test Environment
-- 72 CPU cores
-- 1 GPU (NVIDIA Tesla V100 SXM2 32 GB)
-- 128 Batch Size
-
-  -  It takes over 24 hours (60 epochs) to get some good results.
-
----
-
-## Inference
-
-- arguments
-  - batch_size : Define batch size (default=64)
-  - image_path : Path for lip image files (default='./data/datasets/')
-  - align_path : Path for align files (default='./data/align/')
-  - num_gpus : Num of gpus (if num_gpus is 0, then use cpu) (default=1)
-  - num_workers : Num of workers when generating data (default=0)
-  - data_type : 'train' or 'valid' (defalut='valid')
-  - model_path : Path of pretrained model (default=None)
-
-```
-python infer.py --model_path=$(model_path)
-```
-
-
-```
-[Target]
-['lay green with a zero again',
- 'bin blue with r nine please',
- 'set blue with e five again',
- 'bin green by t seven soon',
- 'lay red at d five now',
- 'bin green in x eight now',
- 'bin blue with e one now',
- 'lay red at j nine now']
- ```
-
- ```
-[Pred]
-['lay green with s zero again',
- 'bin blue with r nine please',
- 'set blue with e five again',
- 'bin green by t seven soon',
- 'lay red at c five now',
- 'bin green in x eight now',
- 'bin blue with m one now',
- 'lay red at j nine now']
- ```
diff --git a/example/gluon/lipnet/asset/mouth_000.png b/example/gluon/lipnet/asset/mouth_000.png
deleted file mode 100644
index b318e56..0000000
Binary files a/example/gluon/lipnet/asset/mouth_000.png and /dev/null differ
diff --git a/example/gluon/lipnet/asset/mouth_001.png b/example/gluon/lipnet/asset/mouth_001.png
deleted file mode 100644
index 60bd04a..0000000
Binary files a/example/gluon/lipnet/asset/mouth_001.png and /dev/null differ
diff --git a/example/gluon/lipnet/asset/mouth_074.png b/example/gluon/lipnet/asset/mouth_074.png
deleted file mode 100644
index e5e0d78..0000000
Binary files a/example/gluon/lipnet/asset/mouth_074.png and /dev/null differ
diff --git a/example/gluon/lipnet/asset/network_structure.png b/example/gluon/lipnet/asset/network_structure.png
deleted file mode 100644
index eeec2cb..0000000
Binary files a/example/gluon/lipnet/asset/network_structure.png and /dev/null differ
diff --git a/example/gluon/lipnet/asset/s2_bbbf7p_000.png b/example/gluon/lipnet/asset/s2_bbbf7p_000.png
deleted file mode 100644
index 6495d2f..0000000
Binary files a/example/gluon/lipnet/asset/s2_bbbf7p_000.png and /dev/null differ
diff --git a/example/gluon/lipnet/asset/s2_bbbf7p_001.png b/example/gluon/lipnet/asset/s2_bbbf7p_001.png
deleted file mode 100644
index 2a7e269..0000000
Binary files a/example/gluon/lipnet/asset/s2_bbbf7p_001.png and /dev/null differ
diff --git a/example/gluon/lipnet/asset/s2_bbbf7p_074.png b/example/gluon/lipnet/asset/s2_bbbf7p_074.png
deleted file mode 100644
index eabd392..0000000
Binary files a/example/gluon/lipnet/asset/s2_bbbf7p_074.png and /dev/null differ
diff --git a/example/gluon/lipnet/checkpoint/__init__.py b/example/gluon/lipnet/checkpoint/__init__.py
deleted file mode 100644
index 13a8339..0000000
--- a/example/gluon/lipnet/checkpoint/__init__.py
+++ /dev/null
@@ -1,16 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
diff --git a/example/gluon/lipnet/data_loader.py b/example/gluon/lipnet/data_loader.py
deleted file mode 100644
index e3cc24b..0000000
--- a/example/gluon/lipnet/data_loader.py
+++ /dev/null
@@ -1,94 +0,0 @@
-"""
-Description : Set DataSet module for lip images
-"""
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import os
-import glob
-from mxnet import nd
-import mxnet.gluon.data.dataset as dataset
-from mxnet.gluon.data.vision.datasets import image
-from utils.align import Align
-
-# pylint: disable=too-many-instance-attributes, too-many-arguments
-class LipsDataset(dataset.Dataset):
-    """
-    Description : DataSet class for lip images
-    """
-    def __init__(self, root, align_root, flag=1,
-                 mode='train', transform=None, seq_len=75):
-        assert mode in ['train', 'valid']
-        self._root = os.path.expanduser(root)
-        self._align_root = align_root
-        self._flag = flag
-        self._transform = transform
-        self._exts = ['.jpg', '.jpeg', '.png']
-        self._seq_len = seq_len
-        self._mode = mode
-        self._list_images(self._root)
-
-    def _list_images(self, root):
-        """
-        Description : generate list for lip images
-        """
-        self.labels = []
-        self.items = []
-
-        valid_unseen_sub_idx = [1, 2, 20, 22]
-        skip_sub_idx = [21]
-
-        if self._mode == 'train':
-            sub_idx = ['s' + str(i) for i in range(1, 35) \
-                             if i not in valid_unseen_sub_idx + skip_sub_idx]
-        elif self._mode == 'valid':
-            sub_idx = ['s' + str(i) for i in valid_unseen_sub_idx]
-
-        folder_path = []
-        for i in sub_idx:
-            folder_path.extend(glob.glob(os.path.join(root, i, "*")))
-
-        for folder in folder_path:
-            filename = glob.glob(os.path.join(folder, "*"))
-            if len(filename) != self._seq_len:
-                continue
-            filename.sort()
-            label = os.path.split(folder)[-1]
-            self.items.append((filename, label))
-
-    def align_generation(self, file_nm, padding=75):
-        """
-        Description : Align to lip position
-        """
-        align = Align(self._align_root + '/' + file_nm + '.align')
-        return nd.array(align.sentence(padding))
-
-    def __getitem__(self, idx):
-        img = list()
-        for image_name in self.items[idx][0]:
-            tmp_img = image.imread(image_name, self._flag)
-            if self._transform is not None:
-                tmp_img = self._transform(tmp_img)
-            img.append(tmp_img)
-        img = nd.stack(*img)
-        img = nd.transpose(img, (1, 0, 2, 3))
-        label = self.align_generation(self.items[idx][1],
-                                      padding=self._seq_len)
-        return img, label
-
-    def __len__(self):
-        return len(self.items)
diff --git a/example/gluon/lipnet/infer.py b/example/gluon/lipnet/infer.py
deleted file mode 100644
index 746df9a..0000000
--- a/example/gluon/lipnet/infer.py
+++ /dev/null
@@ -1,52 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""
-Description : main module to run the lipnet inference code
-"""
-
-
-import argparse
-from trainer import Train
-
-def main():
-    """
-    Description : run lipnet training code using argument info
-    """
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--batch_size', type=int, default=64)
-    parser.add_argument('--image_path', type=str, default='./data/datasets/')
-    parser.add_argument('--align_path', type=str, default='./data/align/')
-    parser.add_argument('--num_gpus', type=int, default=1)
-    parser.add_argument('--num_workers', type=int, default=0)
-    parser.add_argument('--data_type', type=str, default='valid')
-    parser.add_argument('--model_path', type=str, default=None)
-    config = parser.parse_args()
-    trainer = Train(config)
-    trainer.build_model(path=config.model_path)
-    trainer.load_dataloader()
-
-    if config.data_type == 'train':
-        data_loader = trainer.train_dataloader
-    elif config.data_type == 'valid':
-        data_loader = trainer.valid_dataloader
-
-    trainer.infer_batch(data_loader)
-
-if __name__ == "__main__":
-    main()
-    
\ No newline at end of file
diff --git a/example/gluon/lipnet/main.py b/example/gluon/lipnet/main.py
deleted file mode 100644
index 8e5e756..0000000
--- a/example/gluon/lipnet/main.py
+++ /dev/null
@@ -1,47 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""
-Description : main module to run the lipnet training code
-"""
-
-
-import argparse
-from trainer import Train
-
-def main():
-    """
-    Description : run lipnet training code using argument info
-    """
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--batch_size', type=int, default=64)
-    parser.add_argument('--epochs', type=int, default=100)
-    parser.add_argument('--image_path', type=str, default='./data/datasets/')
-    parser.add_argument('--align_path', type=str, default='./data/align/')
-    parser.add_argument('--dr_rate', type=float, default=0.5)
-    parser.add_argument('--num_gpus', type=int, default=1)
-    parser.add_argument('--num_workers', type=int, default=0)
-    parser.add_argument('--model_path', type=str, default=None)
-    config = parser.parse_args()
-    trainer = Train(config)
-    trainer.build_model(dr_rate=config.dr_rate, path=config.model_path)
-    trainer.load_dataloader()
-    trainer.run(epochs=config.epochs)
-
-if __name__ == "__main__":
-    main()
-    
\ No newline at end of file
diff --git a/example/gluon/lipnet/models/__init__.py b/example/gluon/lipnet/models/__init__.py
deleted file mode 100644
index 26fa2ce..0000000
--- a/example/gluon/lipnet/models/__init__.py
+++ /dev/null
@@ -1,16 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-
-#   http://www.apache.org/licenses/LICENSE-2.0
-
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
diff --git a/example/gluon/lipnet/models/network.py b/example/gluon/lipnet/models/network.py
deleted file mode 100644
index b8f005a..0000000
--- a/example/gluon/lipnet/models/network.py
+++ /dev/null
@@ -1,73 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""
-Description : LipNet module using gluon
-"""
-
-from mxnet.gluon import nn, rnn
-# pylint: disable=too-many-instance-attributes
-class LipNet(nn.HybridBlock):
-    """
-    Description : LipNet network using gluon
-    dr_rate : Dropout rate
-    """
-    def __init__(self, dr_rate, **kwargs):
-        super(LipNet, self).__init__(**kwargs)
-        with self.name_scope():
-            self.conv1 = nn.Conv3D(32, kernel_size=(3, 5, 5), strides=(1, 2, 2), padding=(1, 2, 2))
-            self.bn1 = nn.InstanceNorm(in_channels=32)
-            self.dr1 = nn.Dropout(dr_rate, axes=(1, 2))
-            self.pool1 = nn.MaxPool3D((1, 2, 2), (1, 2, 2))
-            self.conv2 = nn.Conv3D(64, kernel_size=(3, 5, 5), strides=(1, 1, 1), padding=(1, 2, 2))
-            self.bn2 = nn.InstanceNorm(in_channels=64)
-            self.dr2 = nn.Dropout(dr_rate, axes=(1, 2))
-            self.pool2 = nn.MaxPool3D((1, 2, 2), (1, 2, 2))
-            self.conv3 = nn.Conv3D(96, kernel_size=(3, 3, 3), strides=(1, 1, 1), padding=(1, 2, 2))
-            self.bn3 = nn.InstanceNorm(in_channels=96)
-            self.dr3 = nn.Dropout(dr_rate, axes=(1, 2))
-            self.pool3 = nn.MaxPool3D((1, 2, 2), (1, 2, 2))
-            self.gru1 = rnn.GRU(256, bidirectional=True)
-            self.gru2 = rnn.GRU(256, bidirectional=True)
-            self.dense = nn.Dense(27+1, flatten=False)
-
-    # pylint: disable=arguments-differ
-    def hybrid_forward(self, F, x):
-        out = self.conv1(x)
-        out = self.bn1(out)
-        out = F.relu(out)
-        out = self.dr1(out)
-        out = self.pool1(out)
-        out = self.conv2(out)
-        out = self.bn2(out)
-        out = F.relu(out)
-        out = self.dr2(out)
-        out = self.pool2(out)
-        out = self.conv3(out)
-        out = self.bn3(out)
-        out = F.relu(out)
-        out = self.dr3(out)
-        out = self.pool3(out)
-        out = F.transpose(out, (2, 0, 1, 3, 4))
-        # pylint: disable=no-member
-        out = out.reshape((0, 0, -1))
-        out = self.gru1(out)
-        out = self.gru2(out)
-        out = self.dense(out)
-        out = F.log_softmax(out, axis=2)
-        out = F.transpose(out, (1, 0, 2))
-        return out
diff --git a/example/gluon/lipnet/requirements.txt b/example/gluon/lipnet/requirements.txt
deleted file mode 100644
index f1fcda3..0000000
--- a/example/gluon/lipnet/requirements.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-dlib==19.15.0
-Pillow==4.1.0
-scipy==0.19.0
-scikit-image==0.13.1
-scikit-video==1.1.11
-sk-video==1.1.10
-tqdm
diff --git a/example/gluon/lipnet/tests/test_beamsearch.py b/example/gluon/lipnet/tests/test_beamsearch.py
deleted file mode 100644
index 069cbae..0000000
--- a/example/gluon/lipnet/tests/test_beamsearch.py
+++ /dev/null
@@ -1,42 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""it is the test for the decode using beam search
-Ref:
-https://github.com/ThomasDelteil/HandwrittenTextRecognition_MXNet/blob/master/utils/CTCDecoder/BeamSearch.py
-"""
-
-import unittest
-import numpy as np
-from BeamSearch import ctcBeamSearch
-
-class TestBeamSearch(unittest.TestCase):
-    """Test Beam Search
-    """
-    def test_ctc_beam_search(self):
-        "test decoder"
-        classes = 'ab'
-        mat = np.array([[0.4, 0, 0.6], [0.4, 0, 0.6]])
-        print('Test beam search')
-        expected = 'a'
-        actual = ctcBeamSearch(mat, classes, None, k=2, beamWidth=3)[0]
-        print('Expected: "' + expected + '"')
-        print('Actual: "' + actual + '"')
-        self.assertEqual(expected, actual)
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/example/gluon/lipnet/trainer.py b/example/gluon/lipnet/trainer.py
deleted file mode 100644
index df5c86e..0000000
--- a/example/gluon/lipnet/trainer.py
+++ /dev/null
@@ -1,232 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""
-Description : Training module for LipNet
-"""
-
-
-import sys
-import mxnet as mx
-from mxnet import gluon, autograd, nd
-from mxnet.gluon.data.vision import transforms
-from tqdm import tqdm, trange
-from data_loader import LipsDataset
-from models.network import LipNet
-from BeamSearch import ctcBeamSearch
-from utils.common import char_conv, int2char
-# set gpu count
-
-
-def setting_ctx(num_gpus):
-    """
-    Description : set gpu module
-    """
-    if num_gpus > 0:
-        ctx = [mx.gpu(i) for i in range(num_gpus)]
-    else:
-        ctx = [mx.cpu()]
-    return ctx
-
-
-ALPHABET = ''
-for i in range(27):
-    ALPHABET += int2char(i)
-
-def char_beam_search(out):
-    """
-    Description : apply beam search for prediction result
-    """
-    out_conv = list()
-    for idx in range(out.shape[0]):
-        probs = out[idx]
-        prob = probs.softmax().asnumpy()
-        line_string_proposals = ctcBeamSearch(prob, ALPHABET, None, k=4, beamWidth=25)
-        out_conv.append(line_string_proposals[0])
-    return out_conv
-
-# pylint: disable=too-many-instance-attributes, too-many-locals
-class Train:
-    """
-    Description : Train class for training network
-    """
-    def __init__(self, config):
-        ##setting hyper-parameters
-        self.batch_size = config.batch_size
-        self.image_path = config.image_path
-        self.align_path = config.align_path
-        self.num_gpus = config.num_gpus
-        self.ctx = setting_ctx(self.num_gpus)
-        self.num_workers = config.num_workers
-        self.seq_len = 75
-
-    def build_model(self, dr_rate=0, path=None):
-        """
-        Description : build network
-        """
-        #set network
-        self.net = LipNet(dr_rate)
-        self.net.hybridize()
-        self.net.initialize(ctx=self.ctx)
-
-        if path is not None:
-            self.load_model(path)
-
-        #set optimizer
-        self.loss_fn = gluon.loss.CTCLoss()
-        self.trainer = gluon.Trainer(self.net.collect_params(), \
-                                     optimizer='SGD')
-
-    def save_model(self, epoch, loss):
-        """
-        Description : save parameter of network weight
-        """
-        prefix = 'checkpoint/epoches'
-        file_name = "{prefix}_{epoch}_loss_{l:.4f}".format(prefix=prefix,
-                                                           epoch=str(epoch),
-                                                           l=loss)
-        self.net.save_parameters(file_name)
-
-    def load_model(self, path=''):
-        """
-        Description : load parameter of network weight
-        """
-        self.net.load_parameters(path)
-
-    def load_dataloader(self):
-        """
-        Description : Setup the dataloader
-        """
-
-        input_transform = transforms.Compose([transforms.ToTensor(), \
-                                             transforms.Normalize((0.7136, 0.4906, 0.3283), \
-                                                                  (0.1138, 0.1078, 0.0917))])
-        training_dataset = LipsDataset(self.image_path,
-                                       self.align_path,
-                                       mode='train',
-                                       transform=input_transform,
-                                       seq_len=self.seq_len)
-
-        self.train_dataloader = mx.gluon.data.DataLoader(training_dataset,
-                                                         batch_size=self.batch_size,
-                                                         shuffle=True,
-                                                         num_workers=self.num_workers)
-
-        valid_dataset = LipsDataset(self.image_path,
-                                    self.align_path,
-                                    mode='valid',
-                                    transform=input_transform,
-                                    seq_len=self.seq_len)
-
-        self.valid_dataloader = mx.gluon.data.DataLoader(valid_dataset,
-                                                         batch_size=self.batch_size,
-                                                         shuffle=True,
-                                                         num_workers=self.num_workers)
-
-    def train(self, data, label, batch_size):
-        """
-        Description : training for LipNet
-        """
-        # pylint: disable=no-member
-        sum_losses = 0
-        len_losses = 0
-        with autograd.record():
-            losses = [self.loss_fn(self.net(X), Y) for X, Y in zip(data, label)]
-        for loss in losses:
-            sum_losses += mx.nd.array(loss).sum().asscalar()
-            len_losses += len(loss)
-            loss.backward()
-        self.trainer.step(batch_size)
-        return sum_losses, len_losses
-
-    def infer(self, input_data, input_label):
-        """
-        Description : Print sentence for prediction result
-        """
-        sum_losses = 0
-        len_losses = 0
-        for data, label in zip(input_data, input_label):
-            pred = self.net(data)
-            sum_losses += mx.nd.array(self.loss_fn(pred, label)).sum().asscalar()
-            len_losses += len(data)
-            pred_convert = char_beam_search(pred)
-            label_convert = char_conv(label.asnumpy())
-            for target, pred in zip(label_convert, pred_convert):
-                print("target:{t}  pred:{p}".format(t=target, p=pred))
-        return sum_losses, len_losses
-
-    def train_batch(self, dataloader):
-        """
-        Description : training for LipNet
-        """
-        sum_losses = 0
-        len_losses = 0
-        for input_data, input_label in tqdm(dataloader):
-            data = gluon.utils.split_and_load(input_data, self.ctx, even_split=False)
-            label = gluon.utils.split_and_load(input_label, self.ctx, even_split=False)
-            batch_size = input_data.shape[0]
-            sum_losses, len_losses = self.train(data, label, batch_size)
-            sum_losses += sum_losses
-            len_losses += len_losses
-
-        return sum_losses, len_losses
-
-    def infer_batch(self, dataloader):
-        """
-        Description : inference for LipNet
-        """
-        sum_losses = 0
-        len_losses = 0
-        for input_data, input_label in dataloader:
-            data = gluon.utils.split_and_load(input_data, self.ctx, even_split=False)
-            label = gluon.utils.split_and_load(input_label, self.ctx, even_split=False)
-            sum_losses, len_losses = self.infer(data, label)
-            sum_losses += sum_losses
-            len_losses += len_losses
-
-        return sum_losses, len_losses
-
-    def run(self, epochs):
-        """
-        Description : Run training for LipNet
-        """
-        best_loss = sys.maxsize
-        for epoch in trange(epochs):
-            iter_no = 0
-
-            ## train
-            sum_losses, len_losses = self.train_batch(self.train_dataloader)
-
-            if iter_no % 20 == 0:
-                current_loss = sum_losses / len_losses
-                print("[Train] epoch:{e} iter:{i} loss:{l:.4f}".format(e=epoch,
-                                                                       i=iter_no,
-                                                                       l=current_loss))
-
-            ## validating
-            sum_val_losses, len_val_losses = self.infer_batch(self.valid_dataloader)
-
-            current_val_loss = sum_val_losses / len_val_losses
-            print("[Vaild] epoch:{e} iter:{i} loss:{l:.4f}".format(e=epoch,
-                                                                   i=iter_no,
-                                                                   l=current_val_loss))
-
-            if best_loss > current_val_loss:
-                self.save_model(epoch, current_val_loss)
-                best_loss = current_val_loss
-
-            iter_no += 1
diff --git a/example/gluon/lipnet/utils/__init__.py b/example/gluon/lipnet/utils/__init__.py
deleted file mode 100644
index 13a8339..0000000
--- a/example/gluon/lipnet/utils/__init__.py
+++ /dev/null
@@ -1,16 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
diff --git a/example/gluon/lipnet/utils/align.py b/example/gluon/lipnet/utils/align.py
deleted file mode 100644
index 48d0716..0000000
--- a/example/gluon/lipnet/utils/align.py
+++ /dev/null
@@ -1,83 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""
-Module: align
-This is used when the data is genrated by LipsDataset
-"""
-
-import numpy as np
-from .common import word_to_vector
-
-
-class Align(object):
-    """
-    Preprocess for Align
-    """
-    skip_list = ['sil', 'sp']
-
-    def __init__(self, align_path):
-        self.build(align_path)
-
-    def build(self, align_path):
-        """
-        Build the align array
-        """
-        file = open(align_path, 'r')
-        lines = file.readlines()
-        file.close()
-        # words: list([op, ed, word])
-        words = []
-        for line in lines:
-            _op, _ed, word = line.strip().split(' ')
-            if word not in Align.skip_list:
-                words.append((int(_op), int(_ed), word))
-        self.words = words
-        self.n_words = len(words)
-        self.sentence_str = " ".join([w[2] for w in self.words])
-        self.sentence_length = len(self.sentence_str)
-
-    def sentence(self, padding=75):
-        """
-        Get sentence
-        """
-        vec = word_to_vector(self.sentence_str)
-        vec += [-1] * (padding - self.sentence_length)
-        return np.array(vec, dtype=np.int32)
-
-    def word(self, _id, padding=75):
-        """
-        Get words
-        """
-        word = self.words[_id][2]
-        vec = word_to_vector(word)
-        vec += [-1] * (padding - len(vec))
-        return np.array(vec, dtype=np.int32)
-
-    def word_length(self, _id):
-        """
-        Get the length of words
-        """
-        return len(self.words[_id][2])
-
-    def word_frame_pos(self, _id):
-        """
-        Get the position of words
-        """
-        left = int(self.words[_id][0]/1000)
-        right = max(left+1, int(self.words[_id][1]/1000))
-        return (left, right)
diff --git a/example/gluon/lipnet/utils/common.py b/example/gluon/lipnet/utils/common.py
deleted file mode 100644
index ec96b68..0000000
--- a/example/gluon/lipnet/utils/common.py
+++ /dev/null
@@ -1,80 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""
-Module: This module contains common conversion functions
-
-"""
-
-
-def char2int(char):
-    """
-    Convert character to integer.
-    """
-    if char >= 'a' and char <= 'z':
-        return ord(char) - ord('a')
-    elif char == ' ':
-        return 26
-    return None
-
-
-def int2char(num):
-    """
-    Convert integer to character.
-    """
-    if num >= 0 and num < 26:
-        return chr(num + ord('a'))
-    elif num == 26:
-        return ' '
-    return None
-
-
-def word_to_vector(word):
-    """
-    Convert character vectors to integer vectors.
-    """
-    vector = []
-    for char in list(word):
-        vector.append(char2int(char))
-    return vector
-
-
-def vector_to_word(vector):
-    """
-    Convert integer vectors to character vectors.
-    """
-    word = ""
-    for vec in vector:
-        word = word + int2char(vec)
-    return word
-
-
-def char_conv(out):
-    """
-    Convert integer vectors to character vectors for batch.
-    """
-    out_conv = list()
-    for i in range(out.shape[0]):
-        tmp_str = ''
-        for j in range(out.shape[1]):
-            if int(out[i][j]) >= 0:
-                tmp_char = int2char(int(out[i][j]))
-                if int(out[i][j]) == 27:
-                    tmp_char = ''
-                tmp_str = tmp_str + tmp_char
-        out_conv.append(tmp_str)
-    return out_conv
diff --git a/example/gluon/lipnet/utils/download_data.py b/example/gluon/lipnet/utils/download_data.py
deleted file mode 100644
index 3051eb2..0000000
--- a/example/gluon/lipnet/utils/download_data.py
+++ /dev/null
@@ -1,112 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""
-Module: download_data
-This module provides utilities for downloading the datasets for training LipNet
-"""
-
-import os
-from os.path import exists
-from multi import multi_p_run, put_worker
-
-
-def download_mp4(from_idx, to_idx, _params):
-    """
-    download mp4s
-    """
-    succ = set()
-    fail = set()
-    for idx in range(from_idx, to_idx):
-        name = 's' + str(idx)
-        save_folder = '{src_path}/{nm}'.format(src_path=_params['src_path'], nm=name)
-        if idx == 0 or os.path.isdir(save_folder):
-            continue
-        script = "http://spandh.dcs.shef.ac.uk/gridcorpus/{nm}/video/{nm}.mpg_vcd.zip".format( \
-                    nm=name)
-        down_sc = 'cd {src_path} && curl {script} --output {nm}.mpg_vcd.zip && \
-                    unzip {nm}.mpg_vcd.zip'.format(script=script,
-                                                   nm=name,
-                                                   src_path=_params['src_path'])
-        try:
-            print(down_sc)
-            os.system(down_sc)
-            succ.add(idx)
-        except OSError as error:
-            print(error)
-            fail.add(idx)
-    return (succ, fail)
-
-
-def download_align(from_idx, to_idx, _params):
-    """
-    download aligns
-    """
-    succ = set()
-    fail = set()
-    for idx in range(from_idx, to_idx):
-        name = 's' + str(idx)
-        if idx == 0:
-            continue
-        script = "http://spandh.dcs.shef.ac.uk/gridcorpus/{nm}/align/{nm}.tar".format(nm=name)
-        down_sc = 'cd {align_path} && wget {script} && \
-                    tar -xvf {nm}.tar'.format(script=script,
-                                              nm=name,
-                                              align_path=_params['align_path'])
-        try:
-            print(down_sc)
-            os.system(down_sc)
-            succ.add(idx)
-        except OSError as error:
-            print(error)
-            fail.add(idx)
-    return (succ, fail)
-
-
-if __name__ == '__main__':
-    import argparse
-    PARSER = argparse.ArgumentParser()
-    PARSER.add_argument('--src_path', type=str, default='../data/mp4s')
-    PARSER.add_argument('--align_path', type=str, default='../data')
-    PARSER.add_argument('--n_process', type=int, default=1)
-    CONFIG = PARSER.parse_args()
-    PARAMS = {'src_path': CONFIG.src_path, 'align_path': CONFIG.align_path}
-    N_PROCESS = CONFIG.n_process
-
-    if exists('./shape_predictor_68_face_landmarks.dat') is False:
-        os.system('wget http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2 && \
-                  bzip2 -d shape_predictor_68_face_landmarks.dat.bz2')
-
-    os.makedirs('{src_path}'.format(src_path=PARAMS['src_path']), exist_ok=True)
-    os.makedirs('{align_path}'.format(align_path=PARAMS['align_path']), exist_ok=True)
-
-    if N_PROCESS == 1:
-        RES = download_mp4(0, 35, PARAMS)
-        RES = download_align(0, 35, PARAMS)
-    else:
-        # download movie files
-        RES = multi_p_run(tot_num=35, _func=put_worker, worker=download_mp4, \
-                          params=PARAMS, n_process=N_PROCESS)
-
-        # download align files
-        RES = multi_p_run(tot_num=35, _func=put_worker, worker=download_align, \
-                          params=PARAMS, n_process=N_PROCESS)
-
-    os.system('rm -f {src_path}/*.zip && rm -f {src_path}/*/Thumbs.db'.format( \
-              src_path=PARAMS['src_path']))
-    os.system('rm -f {align_path}/*.tar && rm -f {align_path}/Thumbs.db'.format( \
-              align_path=PARAMS['align_path']))
diff --git a/example/gluon/lipnet/utils/multi.py b/example/gluon/lipnet/utils/multi.py
deleted file mode 100644
index ce545b5..0000000
--- a/example/gluon/lipnet/utils/multi.py
+++ /dev/null
@@ -1,104 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""
-Module: preprocess with multi-process
-"""
-
-
-def multi_p_run(tot_num, _func, worker, params, n_process):
-    """
-    Run _func with multi-process using params.
-    """
-    from multiprocessing import Process, Queue
-    out_q = Queue()
-    procs = []
-
-    split_num = split_seq(list(range(0, tot_num)), n_process)
-
-    print(tot_num, ">>", split_num)
-
-    split_len = len(split_num)
-    if n_process > split_len:
-        n_process = split_len
-
-    for i in range(n_process):
-        _p = Process(target=_func,
-                     args=(worker, split_num[i][0], split_num[i][1],
-                           params, out_q))
-        _p.daemon = True
-        procs.append(_p)
-        _p.start()
-
-    try:
-        result = []
-        for i in range(n_process):
-            result.append(out_q.get())
-        for i in procs:
-            i.join()
-    except KeyboardInterrupt:
-        print('Killing all the children in the pool.')
-        for i in procs:
-            i.terminate()
-            i.join()
-        return -1
-
-    while not out_q.empty():
-        print(out_q.get(block=False))
-
-    return result
-
-
-def split_seq(sam_num, n_tile):
-    """
-    Split the number(sam_num) into numbers by n_tile
-    """
-    import math
-    print(sam_num)
-    print(n_tile)
-    start_num = sam_num[0::int(math.ceil(len(sam_num) / (n_tile)))]
-    end_num = start_num[1::]
-    end_num.append(len(sam_num))
-    return [[i, j] for i, j in zip(start_num, end_num)]
-
-
-def put_worker(func, from_idx, to_idx, params, out_q):
-    """
-    put worker
-    """
-    succ, fail = func(from_idx, to_idx, params)
-    return out_q.put({'succ': succ, 'fail': fail})
-
-
-def test_worker(from_idx, to_idx, params):
-    """
-    the worker to test multi-process
-    """
-    params = params
-    succ = set()
-    fail = set()
-    for idx in range(from_idx, to_idx):
-        try:
-            succ.add(idx)
-        except ValueError:
-            fail.add(idx)
-    return (succ, fail)
-
-
-if __name__ == '__main__':
-    RES = multi_p_run(35, put_worker, test_worker, params={}, n_process=5)
-    print(RES)
diff --git a/example/gluon/lipnet/utils/preprocess_data.py b/example/gluon/lipnet/utils/preprocess_data.py
deleted file mode 100644
index a13fad8..0000000
--- a/example/gluon/lipnet/utils/preprocess_data.py
+++ /dev/null
@@ -1,262 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""
-Module: preprocess_data
-Reference: https://github.com/rizkiarm/LipNet
-"""
-
-# pylint: disable=too-many-locals, no-self-use, c-extension-no-member
-
-import os
-import fnmatch
-import errno
-import numpy as np
-from scipy import ndimage
-from scipy.misc import imresize
-from skimage import io
-import skvideo.io
-import dlib
-
-def mkdir_p(path):
-    """
-    Make a directory
-    """
-    try:
-        os.makedirs(path)
-    except OSError as exc:  # Python >2.5
-        if exc.errno == errno.EEXIST and os.path.isdir(path):
-            pass
-        else:
-            raise
-
-def find_files(directory, pattern):
-    """
-    Find files
-    """
-    for root, _, files in os.walk(directory):
-        for basename in files:
-            if fnmatch.fnmatch(basename, pattern):
-                filename = os.path.join(root, basename)
-                yield filename
-
-class Video(object):
-    """
-    Preprocess for Video
-    """
-    def __init__(self, vtype='mouth', face_predictor_path=None):
-        if vtype == 'face' and face_predictor_path is None:
-            raise AttributeError('Face video need to be accompanied with face predictor')
-        self.face_predictor_path = face_predictor_path
-        self.vtype = vtype
-        self.face = None
-        self.mouth = None
-        self.data = None
-        self.length = None
-
-    def from_frames(self, path):
-        """
-        Read from frames
-        """
-        frames_path = sorted([os.path.join(path, x) for x in os.listdir(path)])
-        frames = [ndimage.imread(frame_path) for frame_path in frames_path]
-        self.handle_type(frames)
-        return self
-
-    def from_video(self, path):
-        """
-        Read from videos
-        """
-        frames = self.get_video_frames(path)
-        self.handle_type(frames)
-        return self
-
-    def from_array(self, frames):
-        """
-        Read from array
-        """
-        self.handle_type(frames)
-        return self
-
-    def handle_type(self, frames):
-        """
-        Config video types
-        """
-        if self.vtype == 'mouth':
-            self.process_frames_mouth(frames)
-        elif self.vtype == 'face':
-            self.process_frames_face(frames)
-        else:
-            raise Exception('Video type not found')
-
-    def process_frames_face(self, frames):
-        """
-        Preprocess from frames using face detector
-        """
-        detector = dlib.get_frontal_face_detector()
-        predictor = dlib.shape_predictor(self.face_predictor_path)
-        mouth_frames = self.get_frames_mouth(detector, predictor, frames)
-        self.face = np.array(frames)
-        self.mouth = np.array(mouth_frames)
-        if mouth_frames[0] is not None:
-            self.set_data(mouth_frames)
-
-    def process_frames_mouth(self, frames):
-        """
-        Preprocess from frames using mouth detector
-        """
-        self.face = np.array(frames)
-        self.mouth = np.array(frames)
-        self.set_data(frames)
-
-    def get_frames_mouth(self, detector, predictor, frames):
-        """
-        Get frames using mouth crop
-        """
-        mouth_width = 100
-        mouth_height = 50
-        horizontal_pad = 0.19
-        normalize_ratio = None
-        mouth_frames = []
-        for frame in frames:
-            dets = detector(frame, 1)
-            shape = None
-            for det in dets:
-                shape = predictor(frame, det)
-                i = -1
-            if shape is None: # Detector doesn't detect face, just return None
-                return [None]
-            mouth_points = []
-            for part in shape.parts():
-                i += 1
-                if i < 48: # Only take mouth region
-                    continue
-                mouth_points.append((part.x, part.y))
-            np_mouth_points = np.array(mouth_points)
-
-            mouth_centroid = np.mean(np_mouth_points[:, -2:], axis=0)
-
-            if normalize_ratio is None:
-                mouth_left = np.min(np_mouth_points[:, :-1]) * (1.0 - horizontal_pad)
-                mouth_right = np.max(np_mouth_points[:, :-1]) * (1.0 + horizontal_pad)
-
-                normalize_ratio = mouth_width / float(mouth_right - mouth_left)
-
-            new_img_shape = (int(frame.shape[0] * normalize_ratio),
-                             int(frame.shape[1] * normalize_ratio))
-            resized_img = imresize(frame, new_img_shape)
-
-            mouth_centroid_norm = mouth_centroid * normalize_ratio
-
-            mouth_l = int(mouth_centroid_norm[0] - mouth_width / 2)
-            mouth_r = int(mouth_centroid_norm[0] + mouth_width / 2)
-            mouth_t = int(mouth_centroid_norm[1] - mouth_height / 2)
-            mouth_b = int(mouth_centroid_norm[1] + mouth_height / 2)
-
-            mouth_crop_image = resized_img[mouth_t:mouth_b, mouth_l:mouth_r]
-
-            mouth_frames.append(mouth_crop_image)
-        return mouth_frames
-
-    def get_video_frames(self, path):
-        """
-        Get video frames
-        """
-        videogen = skvideo.io.vreader(path)
-        frames = np.array([frame for frame in videogen])
-        return frames
-
-    def set_data(self, frames):
-        """
-        Prepare the input of model
-        """
-        data_frames = []
-        for frame in frames:
-            #frame H x W x C
-            frame = frame.swapaxes(0, 1) # swap width and height to form format W x H x C
-            if len(frame.shape) < 3:
-                frame = np.array([frame]).swapaxes(0, 2).swapaxes(0, 1) # Add grayscale channel
-            data_frames.append(frame)
-        frames_n = len(data_frames)
-        data_frames = np.array(data_frames) # T x W x H x C
-        data_frames = np.rollaxis(data_frames, 3) # C x T x W x H
-        data_frames = data_frames.swapaxes(2, 3) # C x T x H x W  = NCDHW
-
-        self.data = data_frames
-        self.length = frames_n
-
-def preprocess(from_idx, to_idx, _params):
-    """
-    Preprocess: Convert a video into the mouth images
-    """
-    source_exts = '*.mpg'
-    src_path = _params['src_path']
-    tgt_path = _params['tgt_path']
-    face_predictor_path = './shape_predictor_68_face_landmarks.dat'
-
-    succ = set()
-    fail = set()
-    for idx in range(from_idx, to_idx):
-        s_id = 's' + str(idx) + '/'
-        source_path = src_path + '/' + s_id
-        target_path = tgt_path + '/' + s_id
-        fail_cnt = 0
-        for filepath in find_files(source_path, source_exts):
-            print("Processing: {}".format(filepath))
-            filepath_wo_ext = os.path.splitext(filepath)[0].split('/')[-2:]
-            target_dir = os.path.join(tgt_path, '/'.join(filepath_wo_ext))
-
-            if os.path.exists(target_dir):
-                continue
-
-            try:
-                video = Video(vtype='face', \
-                                face_predictor_path=face_predictor_path).from_video(filepath)
-                mkdir_p(target_dir)
-                i = 0
-                if video.mouth[0] is None:
-                    continue
-                for frame in video.mouth:
-                    io.imsave(os.path.join(target_dir, "mouth_{0:03d}.png".format(i)), frame)
-                    i += 1
-            except ValueError as error:
-                print(error)
-                fail_cnt += 1
-        if fail_cnt == 0:
-            succ.add(idx)
-        else:
-            fail.add(idx)
-    return (succ, fail)
-
-if __name__ == '__main__':
-    import argparse
-    from multi import multi_p_run, put_worker
-    PARSER = argparse.ArgumentParser()
-    PARSER.add_argument('--src_path', type=str, default='../data/mp4s')
-    PARSER.add_argument('--tgt_path', type=str, default='../data/datasets')
-    PARSER.add_argument('--n_process', type=int, default=1)
-    CONFIG = PARSER.parse_args()
-    N_PROCESS = CONFIG.n_process
-    PARAMS = {'src_path':CONFIG.src_path,
-              'tgt_path':CONFIG.tgt_path}
-
-    os.makedirs('{tgt_path}'.format(tgt_path=PARAMS['tgt_path']), exist_ok=True)
-
-    if N_PROCESS == 1:
-        RES = preprocess(0, 35, PARAMS)
-    else:
-        RES = multi_p_run(35, put_worker, preprocess, PARAMS, N_PROCESS)
diff --git a/example/gluon/lipnet/utils/run_preprocess.ipynb b/example/gluon/lipnet/utils/run_preprocess.ipynb
deleted file mode 100644
index 7a25e9b..0000000
--- a/example/gluon/lipnet/utils/run_preprocess.ipynb
+++ /dev/null
@@ -1,194 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {
-    "scrolled": true
-   },
-   "outputs": [],
-   "source": [
-    "from download_data import multi_p_run, put_worker, _worker, download_mp4, download_align"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## TEST"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34]\n",
-      "5\n",
-      "35 >> [[0, 7], [7, 14], [14, 21], [21, 28], [28, 35]]\n",
-      "[{'succ': {0, 1, 2, 3, 4, 5, 6}, 'fail': set()}, {'succ': {7, 8, 9, 10, 11, 12, 13}, 'fail': set()}, {'succ': {14, 15, 16, 17, 18, 19, 20}, 'fail': set()}, {'succ': {21, 22, 23, 24, 25, 26, 27}, 'fail': set()}, {'succ': {32, 33, 34, 28, 29, 30, 31}, 'fail': set()}]\n"
-     ]
-    }
-   ],
-   "source": [
-    "res = multi_p_run(35, put_worker, _worker, 5)\n",
-    "print (res)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Download Data"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "## down\n",
-    "import os\n",
-    "os.makedirs('./datasets', exist_ok=True)\n",
-    "#os.system('rm -rf ./datasets/*')\n",
-    "\n",
-    "res = multi_p_run(35, put_worker, download_align, 9)\n",
-    "print (res)\n",
-    "\n",
-    "os.system('rm -f datasets/*.tar && rm -f datasets/align/Thumbs.db')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "res = multi_p_run(35, put_worker, download_mp4, 9)\n",
-    "print (res)\n",
-    "\n",
-    "os.system('rm -f datasets/*.zip && rm -f datasets/*/Thumbs.db')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "## download single 22 th dir\n",
-    "#download_data.py(22, 22)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Preprocess Data"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from preprocess_data import preprocess, find_files, Video"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {
-    "scrolled": true
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0"
-      ]
-     },
-     "execution_count": 4,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "import os\n",
-    "os.makedirs('./TARGET', exist_ok=True)\n",
-    "os.system('rm -rf ./TARGET/*')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34]\n",
-      "9\n",
-      "35 >> [[0, 4], [4, 8], [8, 12], [12, 16], [16, 20], [20, 24], [24, 28], [28, 32], [32, 35]]\n",
-      "Processing: datasets/s1/prwq3s.mpg\n",
-      "Processing: datasets/s4/lrix7n.mpg\n",
-      "Processing: datasets/s8/pgbyza.mpg\n",
-      "Processing: datasets/s12/brik7n.mpg\n",
-      "Processing: datasets/s16/sgit7p.mpg\n",
-      "Processing: datasets/s20/lrbp8a.mpg\n",
-      "Processing: datasets/s24/sbik8a.mpg\n",
-      "Processing: datasets/s28/srwf8a.mpg\n",
-      "Processing: datasets/s32/pbbm1n.mpg\n",
-      "Processing: datasets/s12/sbbaza.mpg\n",
-      "Processing: datasets/s28/lbit7n.mpg\n",
-      "Processing: datasets/s32/pbwm7p.mpg\n",
-      "Processing: datasets/s8/bril2s.mpg\n",
-      "Processing: datasets/s20/bway7n.mpg\n",
-      "Processing: datasets/s1/pbib8p.mpg\n",
-      "Processing: datasets/s16/lwaj7n.mpg\n",
-      "Processing: datasets/s24/bwwl6a.mpg\n",
-      "Processing: datasets/s4/bbwf7n.mpg\n"
-     ]
-    }
-   ],
-   "source": [
-    "res = multi_p_run(35, put_worker, preprocess, 9)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.6.6"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/example/gluon/lipnet/utils/run_preprocess_single_process.ipynb b/example/gluon/lipnet/utils/run_preprocess_single_process.ipynb
deleted file mode 100644
index 4311323..0000000
--- a/example/gluon/lipnet/utils/run_preprocess_single_process.ipynb
+++ /dev/null
@@ -1,360 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {
-    "scrolled": true
-   },
-   "outputs": [],
-   "source": [
-    "from download_data import multi_p_run, put_worker, test_worker, download_mp4, download_align"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import os"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "tot_movies=35"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## TEST"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34]\n",
-      "5\n",
-      "35 >> [[0, 7], [7, 14], [14, 21], [21, 28], [28, 35]]\n",
-      "[{'succ': {0, 1, 2, 3, 4, 5, 6}, 'fail': set()}, {'succ': {7, 8, 9, 10, 11, 12, 13}, 'fail': set()}, {'succ': {14, 15, 16, 17, 18, 19, 20}, 'fail': set()}, {'succ': {21, 22, 23, 24, 25, 26, 27}, 'fail': set()}, {'succ': {32, 33, 34, 28, 29, 30, 31}, 'fail': set()}]\n"
-     ]
-    }
-   ],
-   "source": [
-    "res = multi_p_run(tot_movies, put_worker, test_worker, params={}, n_process=5)\n",
-    "print (res)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Download Data"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Aligns"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s0/align/s0.tar && tar -xvf s0.tar\n",
-      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s1/align/s1.tar && tar -xvf s1.tar\n",
-      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s2/align/s2.tar && tar -xvf s2.tar\n",
-      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s3/align/s3.tar && tar -xvf s3.tar\n",
-      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s4/align/s4.tar && tar -xvf s4.tar\n",
-      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s5/align/s5.tar && tar -xvf s5.tar\n",
-      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s6/align/s6.tar && tar -xvf s6.tar\n",
-      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s7/align/s7.tar && tar -xvf s7.tar\n",
-      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s8/align/s8.tar && tar -xvf s8.tar\n",
-      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s9/align/s9.tar && tar -xvf s9.tar\n",
-      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s10/align/s10.tar && tar -xvf s10.tar\n",
-      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s11/align/s11.tar && tar -xvf s11.tar\n",
-      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s12/align/s12.tar && tar -xvf s12.tar\n",
-      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s13/align/s13.tar && tar -xvf s13.tar\n",
-      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s14/align/s14.tar && tar -xvf s14.tar\n",
-      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s15/align/s15.tar && tar -xvf s15.tar\n",
-      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s16/align/s16.tar && tar -xvf s16.tar\n",
-      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s17/align/s17.tar && tar -xvf s17.tar\n",
-      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s18/align/s18.tar && tar -xvf s18.tar\n",
-      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s19/align/s19.tar && tar -xvf s19.tar\n",
-      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s20/align/s20.tar && tar -xvf s20.tar\n",
-      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s21/align/s21.tar && tar -xvf s21.tar\n",
-      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s22/align/s22.tar && tar -xvf s22.tar\n",
-      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s23/align/s23.tar && tar -xvf s23.tar\n",
-      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s24/align/s24.tar && tar -xvf s24.tar\n",
-      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s25/align/s25.tar && tar -xvf s25.tar\n",
-      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s26/align/s26.tar && tar -xvf s26.tar\n",
-      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s27/align/s27.tar && tar -xvf s27.tar\n",
-      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s28/align/s28.tar && tar -xvf s28.tar\n",
-      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s29/align/s29.tar && tar -xvf s29.tar\n",
-      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s30/align/s30.tar && tar -xvf s30.tar\n",
-      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s31/align/s31.tar && tar -xvf s31.tar\n",
-      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s32/align/s32.tar && tar -xvf s32.tar\n",
-      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s33/align/s33.tar && tar -xvf s33.tar\n",
-      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s34/align/s34.tar && tar -xvf s34.tar\n"
-     ]
-    }
-   ],
-   "source": [
-    "align_path = '../data/align'\n",
-    "os.makedirs(align_path, exist_ok=True)\n",
-    "\n",
-    "res = download_align(0, tot_movies, {'align_path':align_path})"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34}, set())\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "0"
-      ]
-     },
-     "execution_count": 6,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "print (res)\n",
-    "os.system('rm -f {align_path}/*.tar && rm -f {align_path}/Thumbs.db'.format(align_path=align_path))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "### Moives(MP4s)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s0/video/s0.mpg_vcd.zip --output s0.mpg_vcd.zip && unzip s0.mpg_vcd.zip\n",
-      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s1/video/s1.mpg_vcd.zip --output s1.mpg_vcd.zip && unzip s1.mpg_vcd.zip\n",
-      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s2/video/s2.mpg_vcd.zip --output s2.mpg_vcd.zip && unzip s2.mpg_vcd.zip\n",
-      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s3/video/s3.mpg_vcd.zip --output s3.mpg_vcd.zip && unzip s3.mpg_vcd.zip\n",
-      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s4/video/s4.mpg_vcd.zip --output s4.mpg_vcd.zip && unzip s4.mpg_vcd.zip\n",
-      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s5/video/s5.mpg_vcd.zip --output s5.mpg_vcd.zip && unzip s5.mpg_vcd.zip\n",
-      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s6/video/s6.mpg_vcd.zip --output s6.mpg_vcd.zip && unzip s6.mpg_vcd.zip\n",
-      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s7/video/s7.mpg_vcd.zip --output s7.mpg_vcd.zip && unzip s7.mpg_vcd.zip\n",
-      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s8/video/s8.mpg_vcd.zip --output s8.mpg_vcd.zip && unzip s8.mpg_vcd.zip\n",
-      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s9/video/s9.mpg_vcd.zip --output s9.mpg_vcd.zip && unzip s9.mpg_vcd.zip\n",
-      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s10/video/s10.mpg_vcd.zip --output s10.mpg_vcd.zip && unzip s10.mpg_vcd.zip\n",
-      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s11/video/s11.mpg_vcd.zip --output s11.mpg_vcd.zip && unzip s11.mpg_vcd.zip\n",
-      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s12/video/s12.mpg_vcd.zip --output s12.mpg_vcd.zip && unzip s12.mpg_vcd.zip\n",
-      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s13/video/s13.mpg_vcd.zip --output s13.mpg_vcd.zip && unzip s13.mpg_vcd.zip\n",
-      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s14/video/s14.mpg_vcd.zip --output s14.mpg_vcd.zip && unzip s14.mpg_vcd.zip\n",
-      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s15/video/s15.mpg_vcd.zip --output s15.mpg_vcd.zip && unzip s15.mpg_vcd.zip\n",
-      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s16/video/s16.mpg_vcd.zip --output s16.mpg_vcd.zip && unzip s16.mpg_vcd.zip\n",
-      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s17/video/s17.mpg_vcd.zip --output s17.mpg_vcd.zip && unzip s17.mpg_vcd.zip\n",
-      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s18/video/s18.mpg_vcd.zip --output s18.mpg_vcd.zip && unzip s18.mpg_vcd.zip\n",
-      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s19/video/s19.mpg_vcd.zip --output s19.mpg_vcd.zip && unzip s19.mpg_vcd.zip\n",
-      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s20/video/s20.mpg_vcd.zip --output s20.mpg_vcd.zip && unzip s20.mpg_vcd.zip\n",
-      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s21/video/s21.mpg_vcd.zip --output s21.mpg_vcd.zip && unzip s21.mpg_vcd.zip\n",
-      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s22/video/s22.mpg_vcd.zip --output s22.mpg_vcd.zip && unzip s22.mpg_vcd.zip\n",
-      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s23/video/s23.mpg_vcd.zip --output s23.mpg_vcd.zip && unzip s23.mpg_vcd.zip\n",
-      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s24/video/s24.mpg_vcd.zip --output s24.mpg_vcd.zip && unzip s24.mpg_vcd.zip\n",
-      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s25/video/s25.mpg_vcd.zip --output s25.mpg_vcd.zip && unzip s25.mpg_vcd.zip\n",
-      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s26/video/s26.mpg_vcd.zip --output s26.mpg_vcd.zip && unzip s26.mpg_vcd.zip\n",
-      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s27/video/s27.mpg_vcd.zip --output s27.mpg_vcd.zip && unzip s27.mpg_vcd.zip\n",
-      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s28/video/s28.mpg_vcd.zip --output s28.mpg_vcd.zip && unzip s28.mpg_vcd.zip\n",
-      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s29/video/s29.mpg_vcd.zip --output s29.mpg_vcd.zip && unzip s29.mpg_vcd.zip\n",
-      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s30/video/s30.mpg_vcd.zip --output s30.mpg_vcd.zip && unzip s30.mpg_vcd.zip\n",
-      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s31/video/s31.mpg_vcd.zip --output s31.mpg_vcd.zip && unzip s31.mpg_vcd.zip\n",
-      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s32/video/s32.mpg_vcd.zip --output s32.mpg_vcd.zip && unzip s32.mpg_vcd.zip\n",
-      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s33/video/s33.mpg_vcd.zip --output s33.mpg_vcd.zip && unzip s33.mpg_vcd.zip\n",
-      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s34/video/s34.mpg_vcd.zip --output s34.mpg_vcd.zip && unzip s34.mpg_vcd.zip\n"
-     ]
-    }
-   ],
-   "source": [
-    "src_path = '../data/mp4s'\n",
-    "res = download_mp4(0, tot_movies, {'src_path':src_path})"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34}, set())\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "0"
-      ]
-     },
-     "execution_count": 9,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "print (res)\n",
-    "os.system('rm -f {src_path}/*.zip && rm -f {src_path}/*/Thumbs.db'.format(src_path=src_path))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Preprocess Data"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from preprocess_data import preprocess, find_files, Video"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "tgt_path = '../data/datasets'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0"
-      ]
-     },
-     "execution_count": 12,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "os.makedirs('{tgt_path}'.format(tgt_path=tgt_path), exist_ok=True)\n",
-    "os.system('rm -rf {tgt_path}'.format(tgt_path=tgt_path))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "res = preprocess(0, tot_movies, {'src_path':src_path, 'tgt_path':tgt_path})"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34}, set())\n"
-     ]
-    }
-   ],
-   "source": [
-    "print (res)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python [default]",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.6.4"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/example/gluon/lstm_crf/README.md b/example/gluon/lstm_crf/README.md
deleted file mode 100644
index 519c3b8..0000000
--- a/example/gluon/lstm_crf/README.md
+++ /dev/null
@@ -1,36 +0,0 @@
-<!--- Licensed to the Apache Software Foundation (ASF) under one -->
-<!--- or more contributor license agreements.  See the NOTICE file -->
-<!--- distributed with this work for additional information -->
-<!--- regarding copyright ownership.  The ASF licenses this file -->
-<!--- to you under the Apache License, Version 2.0 (the -->
-<!--- "License"); you may not use this file except in compliance -->
-<!--- with the License.  You may obtain a copy of the License at -->
-
-<!---   http://www.apache.org/licenses/LICENSE-2.0 -->
-
-<!--- Unless required by applicable law or agreed to in writing, -->
-<!--- software distributed under the License is distributed on an -->
-<!--- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -->
-<!--- KIND, either express or implied.  See the License for the -->
-<!--- specific language governing permissions and limitations -->
-<!--- under the License. -->
-
-# BiLSTM CRF model
-This example demonstrates how a [BiLSTM-CRF model](https://arxiv.org/pdf/1508.01991v1.pdf) can be implemented in Gluon to perform noun-phrase chunking as a sequence labeling task.  In this example we define the following training sample:
-```
-georgia tech is a university in georgia
-B I O O O O B
-```
-The second line is the IOB representation of the above sentence that is learnt by the model. **I** stands for in chunk, **O** for out of a chunk and **B** for beginning of junks.
-
-The model consists of an LSTM layer with 2 hidden units and a CRF layer. The CRF layer has a state transition matrix which allows to take past and future tags into account when predicting the current tag. The bidirectional LSTM is reading the word sequence from beginning to end and vice versa. It prodcues a vector representation for the words. The following image is taken from https://arxiv.org/pdf/1508.01991v1.pdf and shows the model architecture:
-
-![Image taken from https://arxiv.org/pdf/1508.01991v1.pdf](https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/example/gluon/lstm_crf/bi-lstm_crf.png)
-
-You can run the example by executing 
-```
-python lstm_crf.py
-```
-The example code does not take any commandline arguments. If you want to change the number of hidden units or the size of vectors embeddings, then you need to change the variables ```EMBEDDING_DIM``` and ```HIDDEN_DIM```.
-
-
diff --git a/example/gluon/lstm_crf/lstm_crf.py b/example/gluon/lstm_crf/lstm_crf.py
deleted file mode 100644
index 6cdc6e9..0000000
--- a/example/gluon/lstm_crf/lstm_crf.py
+++ /dev/null
@@ -1,241 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""This example demonstrates how the LSTM-CRF model can be implemented
-in Gluon to perform noun-phrase chunking as a sequence labeling task.
-"""
-import sys
-import mxnet as mx
-from mxnet import autograd as ag, ndarray as nd, gluon
-from mxnet.gluon import Block, nn, rnn
-import mxnet.optimizer as optim
-
-mx.random.seed(1)
-
-
-# Helper functions to make the code more readable.
-def to_scalar(x):
-    return int(x.asscalar())
-
-
-def argmax(vec):
-    # return the argmax as a python int
-    idx = nd.argmax(vec, axis=1)
-    return to_scalar(idx)
-
-
-def prepare_sequence(seq, word2Idx):
-    return nd.array([word2Idx[w] for w in seq])
-
-
-# Compute log sum exp is numerically more stable than multiplying probabilities
-def log_sum_exp(vec):
-    max_score = nd.max(vec).asscalar()
-    return nd.log(nd.sum(nd.exp(vec - max_score))) + max_score
-
-
-# Model
-class BiLSTM_CRF(Block):
-    """Get BiLSTM_CRF model"""
-    def __init__(self, vocab_size, tag2Idx, embedding_dim, hidden_dim):
-        super(BiLSTM_CRF, self).__init__()
-        with self.name_scope():
-            self.embedding_dim = embedding_dim
-            self.hidden_dim = hidden_dim
-            self.vocab_size = vocab_size
-            self.tag2idx = tag2Idx
-            self.tagset_size = len(tag2Idx)
-            self.word_embeds = nn.Embedding(vocab_size, embedding_dim)
-            self.lstm = rnn.LSTM(hidden_dim // 2, num_layers=1, bidirectional=True)
-
-            # Maps the output of the LSTM into tag space.
-            self.hidden2tag = nn.Dense(self.tagset_size)
-
-            # Matrix of transition parameters.  Entry i,j is the score of
-            # transitioning *to* i *from* j.
-            self.transitions = self.params.get("crf_transition_matrix", shape=(self.tagset_size, self.tagset_size))
-            self.hidden = self.init_hidden()
-
-    def init_hidden(self):
-        return [nd.random.normal(shape=(2, 1, self.hidden_dim // 2)),
-                nd.random.normal(shape=(2, 1, self.hidden_dim // 2))]
-
-    def _forward_alg(self, feats):
-        # Do the forward algorithm to compute the partition function
-        alphas = [[-10000.] * self.tagset_size]
-        alphas[0][self.tag2idx[START_TAG]] = 0.
-        alphas = nd.array(alphas)
-
-        # Iterate through the sentence
-        for feat in feats:
-            alphas_t = []  # The forward variables at this timestep
-            for next_tag in range(self.tagset_size):
-                # broadcast the emission score: it is the same regardless of
-                # the previous tag
-                emit_score = feat[next_tag].reshape((1, -1))
-                # the ith entry of trans_score is the score of transitioning to
-                # next_tag from i
-                trans_score = self.transitions.data()[next_tag].reshape((1, -1))
-                # The ith entry of next_tag_var is the value for the
-                # edge (i -> next_tag) before we do log-sum-exp
-                next_tag_var = alphas + trans_score + emit_score
-                # The forward variable for this tag is log-sum-exp of all the
-                # scores.
-                alphas_t.append(log_sum_exp(next_tag_var))
-            alphas = nd.concat(*alphas_t, dim=0).reshape((1, -1))
-        terminal_var = alphas + self.transitions.data()[self.tag2idx[STOP_TAG]]
-        alpha = log_sum_exp(terminal_var)
-        return alpha
-
-    def _get_lstm_features(self, sentences):
-        self.hidden = self.init_hidden()
-        length = sentences.shape[0]
-        embeds = self.word_embeds(sentences).reshape((length, 1, -1))
-        lstm_out, self.hidden = self.lstm(embeds, self.hidden)
-        lstm_out = lstm_out.reshape((length, self.hidden_dim))
-        lstm_feats = self.hidden2tag(lstm_out)
-        return nd.split(lstm_feats, num_outputs=length, axis=0, squeeze_axis=True)
-
-    def _score_sentence(self, feats, tags_array):
-        # Gives the score of a provided tag sequence
-        score = nd.array([0])
-        tags_array = nd.concat(nd.array([self.tag2idx[START_TAG]]), *tags_array, dim=0)
-        for idx, feat in enumerate(feats):
-            score = score + \
-                    self.transitions.data()[to_scalar(tags_array[idx+1]),
-                                            to_scalar(tags_array[idx])] + feat[to_scalar(tags_array[idx+1])]
-        score = score + self.transitions.data()[self.tag2idx[STOP_TAG],
-                                                to_scalar(tags_array[int(tags_array.shape[0]-1)])]
-        return score
-
-    def _viterbi_decode(self, feats):
-        backpointers = []
-
-        # Initialize the viterbi variables in log space
-        vvars = nd.full((1, self.tagset_size), -10000.)
-        vvars[0, self.tag2idx[START_TAG]] = 0
-
-        for feat in feats:
-            bptrs_t = []  # holds the backpointers for this step
-            viterbivars_t = []  # holds the viterbi variables for this step
-
-            for next_tag in range(self.tagset_size):
-                # next_tag_var[i] holds the viterbi variable for tag i at the
-                # previous step, plus the score of transitioning
-                # from tag i to next_tag.
-                # We don't include the emission scores here because the max
-                # does not depend on them (we add them in below)
-                next_tag_var = vvars + self.transitions.data()[next_tag]
-                best_tag_id = argmax(next_tag_var)
-                bptrs_t.append(best_tag_id)
-                viterbivars_t.append(next_tag_var[0, best_tag_id])
-            # Now add in the emission scores, and assign vvars to the set
-            # of viterbi variables we just computed
-            vvars = (nd.concat(*viterbivars_t, dim=0) + feat).reshape((1, -1))
-            backpointers.append(bptrs_t)
-
-        # Transition to STOP_TAG
-        terminal_var = vvars + self.transitions.data()[self.tag2idx[STOP_TAG]]
-        best_tag_id = argmax(terminal_var)
-        path_score = terminal_var[0, best_tag_id]
-
-        # Follow the back pointers to decode the best path.
-        best_path = [best_tag_id]
-        for bptrs_t in reversed(backpointers):
-            best_tag_id = bptrs_t[best_tag_id]
-            best_path.append(best_tag_id)
-        # Pop off the start tag (we dont want to return that to the caller)
-        start = best_path.pop()
-        assert start == self.tag2idx[START_TAG]  # Sanity check
-        best_path.reverse()
-        return path_score, best_path
-
-    def neg_log_likelihood(self, sentences, tags_list):
-        feats = self._get_lstm_features(sentences)
-        forward_score = self._forward_alg(feats)
-        gold_score = self._score_sentence(feats, tags_list)
-        return forward_score - gold_score
-
-    def forward(self, sentences):  # dont confuse this with _forward_alg above.
-        # Get the emission scores from the BiLSTM
-        lstm_feats = self._get_lstm_features(sentences)
-
-        # Find the best path, given the features.
-        score, tag_seq = self._viterbi_decode(lstm_feats)
-        return score, tag_seq
-
-
-# Run training
-START_TAG = "<START>"
-STOP_TAG = "<STOP>"
-EMBEDDING_DIM = 5
-HIDDEN_DIM = 4
-
-# Make up some training data
-training_data = [(
-    "the wall street journal reported today that apple corporation made money".split(),
-    "B I I I O O O B I O O".split()
-), (
-    "georgia tech is a university in georgia".split(),
-    "B I O O O O B".split()
-)]
-
-word2idx = {}
-for sentence, tags in training_data:
-    for word in sentence:
-        if word not in word2idx:
-            word2idx[word] = len(word2idx)
-
-tag2idx = {"B": 0, "I": 1, "O": 2, START_TAG: 3, STOP_TAG: 4}
-
-model = BiLSTM_CRF(len(word2idx), tag2idx, EMBEDDING_DIM, HIDDEN_DIM)
-model.initialize(mx.init.Xavier(magnitude=2.24), ctx=mx.cpu())
-optimizer = gluon.Trainer(model.collect_params(), 'sgd', {'learning_rate': 0.01, 'wd': 1e-4})
-
-# Check predictions before training
-precheck_sent = prepare_sequence(training_data[0][0], word2idx)
-precheck_tags = nd.array([tag2idx[t] for t in training_data[0][1]])
-print(model(precheck_sent))
-
-# Make sure prepare_sequence from earlier in the LSTM section is loaded
-for epoch in range(300):  # again, normally you would NOT do 300 epochs, it is toy data
-
-    neg_log_likelihood_acc = 0.
-    iter = 0
-    for i, (sentence, tags) in enumerate(training_data):
-        # Step 1. Get our inputs ready for the network, that is,
-        # turn them into Variables of word indices.
-        # Remember to use autograd to record the calculation.
-        with ag.record():
-            sentence_in = prepare_sequence(sentence, word2idx)
-            targets = nd.array([tag2idx[t] for t in tags])
-
-            # Step 2. Run our forward pass.
-            neg_log_likelihood = model.neg_log_likelihood(sentence_in, targets)
-
-            # Step 3. Compute the loss, gradients, and update the parameters by
-            # calling optimizer.step()
-            neg_log_likelihood.backward()
-        optimizer.step(1)
-        neg_log_likelihood_acc += neg_log_likelihood.mean()
-        iter = i
-    print("Epoch [{}], Negative Log Likelihood {:.4f}".format(epoch, neg_log_likelihood_acc.asscalar()/(iter+1)))
-
-# Check predictions after training
-precheck_sent = prepare_sequence(training_data[0][0], word2idx)
-print(model(precheck_sent))
-
-# Acknowledgement: this example is adopted from pytorch nlp tutorials.
diff --git a/example/gluon/mnist/mnist.py b/example/gluon/mnist/mnist.py
index 8066379..121fcdf 100644
--- a/example/gluon/mnist/mnist.py
+++ b/example/gluon/mnist/mnist.py
@@ -71,8 +71,8 @@ val_data = gluon.data.DataLoader(
 def test(ctx):
     metric = mx.gluon.metric.Accuracy()
     for data, label in val_data:
-        data = data.as_in_context(ctx)
-        label = label.as_in_context(ctx)
+        data = data.as_in_ctx(ctx)
+        label = label.as_in_ctx(ctx)
         output = net(data)
         metric.update([label], [output])
 
@@ -93,8 +93,8 @@ def train(epochs, ctx):
         metric.reset()
         for i, (data, label) in enumerate(train_data):
             # Copy data to ctx if necessary
-            data = data.as_in_context(ctx)
-            label = label.as_in_context(ctx)
+            data = data.as_in_ctx(ctx)
+            label = label.as_in_ctx(ctx)
             # Start recording computation graph with record() section.
             # Recorded graphs can then be differentiated with backward.
             with autograd.record():
diff --git a/example/gluon/sn_gan/README.md b/example/gluon/sn_gan/README.md
deleted file mode 100644
index 054416f..0000000
--- a/example/gluon/sn_gan/README.md
+++ /dev/null
@@ -1,61 +0,0 @@
-<!--- Licensed to the Apache Software Foundation (ASF) under one -->
-<!--- or more contributor license agreements.  See the NOTICE file -->
-<!--- distributed with this work for additional information -->
-<!--- regarding copyright ownership.  The ASF licenses this file -->
-<!--- to you under the Apache License, Version 2.0 (the -->
-<!--- "License"); you may not use this file except in compliance -->
-<!--- with the License.  You may obtain a copy of the License at -->
-
-<!---   http://www.apache.org/licenses/LICENSE-2.0 -->
-
-<!--- Unless required by applicable law or agreed to in writing, -->
-<!--- software distributed under the License is distributed on an -->
-<!--- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -->
-<!--- KIND, either express or implied.  See the License for the -->
-<!--- specific language governing permissions and limitations -->
-<!--- under the License. -->
-
-# Spectral Normalization GAN
-
-This example implements [Spectral Normalization for Generative Adversarial Networks](https://arxiv.org/abs/1802.05957) based on [CIFAR10](https://www.cs.toronto.edu/~kriz/cifar.html) dataset.
-
-## Usage
-
-Example runs and the results:
-
-```python
-python train.py --use-gpu --data-path=data
-```
-
-* Note that the program would download the CIFAR10 for you
-
-`python train.py --help` gives the following arguments:
-
-```bash
-optional arguments:
-  -h, --help            show this help message and exit
-  --data-path DATA_PATH
-                        path of data.
-  --batch-size BATCH_SIZE
-                        training batch size. default is 64.
-  --epochs EPOCHS       number of training epochs. default is 100.
-  --lr LR               learning rate. default is 0.0001.
-  --lr-beta LR_BETA     learning rate for the beta in margin based loss.
-                        default is 0.5.
-  --use-gpu             use gpu for training.
-  --clip_gr CLIP_GR     Clip the gradient by projecting onto the box. default
-                        is 10.0.
-  --z-dim Z_DIM         dimension of the latent z vector. default is 100.
-```
-
-## Result
-
-![SN-GAN](sn_gan_output.png)
-
-## Learned Spectral Normalization
-
-![alt text](https://github.com/taki0112/Spectral_Normalization-Tensorflow/blob/master/assests/sn.png)
-
-## Reference
-
-[Simple Tensorflow Implementation](https://github.com/taki0112/Spectral_Normalization-Tensorflow)
\ No newline at end of file
diff --git a/example/gluon/sn_gan/data.py b/example/gluon/sn_gan/data.py
deleted file mode 100644
index 754aa2c..0000000
--- a/example/gluon/sn_gan/data.py
+++ /dev/null
@@ -1,42 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# This example is inspired by https://github.com/jason71995/Keras-GAN-Library,
-# https://github.com/kazizzad/DCGAN-Gluon-MxNet/blob/master/MxnetDCGAN.ipynb
-# https://github.com/apache/incubator-mxnet/blob/master/example/gluon/dc_gan/dcgan.py
-
-import numpy as np
-
-import mxnet as mx
-from mxnet import gluon
-from mxnet.gluon.data.vision import CIFAR10
-
-IMAGE_SIZE = 64
-
-def transformer(data, label):
-    """ data preparation """
-    data = mx.image.imresize(data, IMAGE_SIZE, IMAGE_SIZE)
-    data = mx.nd.transpose(data, (2, 0, 1))
-    data = data.astype(np.float32) / 128.0 - 1
-    return data, label
-
-
-def get_training_data(batch_size):
-    """ helper function to get dataloader"""
-    return gluon.data.DataLoader(
-        CIFAR10(train=True).transform(transformer),
-        batch_size=batch_size, shuffle=True, last_batch='discard')
diff --git a/example/gluon/sn_gan/model.py b/example/gluon/sn_gan/model.py
deleted file mode 100644
index cfd7f93..0000000
--- a/example/gluon/sn_gan/model.py
+++ /dev/null
@@ -1,139 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# This example is inspired by https://github.com/jason71995/Keras-GAN-Library,
-# https://github.com/kazizzad/DCGAN-Gluon-MxNet/blob/master/MxnetDCGAN.ipynb
-# https://github.com/apache/incubator-mxnet/blob/master/example/gluon/dc_gan/dcgan.py
-
-import mxnet as mx
-from mxnet import nd
-from mxnet import gluon, autograd
-from mxnet.gluon import Block
-
-
-EPSILON = 1e-08
-POWER_ITERATION = 1
-
-class SNConv2D(Block):
-    """ Customized Conv2D to feed the conv with the weight that we apply spectral normalization """
-
-    def __init__(self, num_filter, kernel_size,
-                 strides, padding, in_channels,
-                 ctx=mx.cpu(), iterations=1):
-
-        super(SNConv2D, self).__init__()
-
-        self.num_filter = num_filter
-        self.kernel_size = kernel_size
-        self.strides = strides
-        self.padding = padding
-        self.in_channels = in_channels
-        self.iterations = iterations
-        self.ctx = ctx
-
-        with self.name_scope():
-            # init the weight
-            self.weight = self.params.get('weight', shape=(
-                num_filter, in_channels, kernel_size, kernel_size))
-            self.u = self.params.get(
-                'u', init=mx.init.Normal(), shape=(1, num_filter))
-
-    def _spectral_norm(self):
-        """ spectral normalization """
-        w = self.params.get('weight').data(self.ctx)
-        w_mat = nd.reshape(w, [w.shape[0], -1])
-
-        _u = self.u.data(self.ctx)
-        _v = None
-
-        for _ in range(POWER_ITERATION):
-            _v = nd.L2Normalization(nd.dot(_u, w_mat))
-            _u = nd.L2Normalization(nd.dot(_v, w_mat.T))
-
-        sigma = nd.sum(nd.dot(_u, w_mat) * _v)
-        if sigma == 0.:
-            sigma = EPSILON
-
-        with autograd.pause():
-            self.u.set_data(_u)
-
-        return w / sigma
-
-    def forward(self, x):
-        # x shape is batch_size x in_channels x height x width
-        return nd.Convolution(
-            data=x,
-            weight=self._spectral_norm(),
-            kernel=(self.kernel_size, self.kernel_size),
-            pad=(self.padding, self.padding),
-            stride=(self.strides, self.strides),
-            num_filter=self.num_filter,
-            no_bias=True
-        )
-
-
-def get_generator():
-    """ construct and return generator """
-    g_net = gluon.nn.Sequential()
-    with g_net.name_scope():
-
-        g_net.add(gluon.nn.Conv2DTranspose(
-            channels=512, kernel_size=4, strides=1, padding=0, use_bias=False))
-        g_net.add(gluon.nn.BatchNorm())
-        g_net.add(gluon.nn.LeakyReLU(0.2))
-
-        g_net.add(gluon.nn.Conv2DTranspose(
-            channels=256, kernel_size=4, strides=2, padding=1, use_bias=False))
-        g_net.add(gluon.nn.BatchNorm())
-        g_net.add(gluon.nn.LeakyReLU(0.2))
-
-        g_net.add(gluon.nn.Conv2DTranspose(
-            channels=128, kernel_size=4, strides=2, padding=1, use_bias=False))
-        g_net.add(gluon.nn.BatchNorm())
-        g_net.add(gluon.nn.LeakyReLU(0.2))
-
-        g_net.add(gluon.nn.Conv2DTranspose(
-            channels=64, kernel_size=4, strides=2, padding=1, use_bias=False))
-        g_net.add(gluon.nn.BatchNorm())
-        g_net.add(gluon.nn.LeakyReLU(0.2))
-
-        g_net.add(gluon.nn.Conv2DTranspose(channels=3, kernel_size=4, strides=2, padding=1, use_bias=False))
-        g_net.add(gluon.nn.Activation('tanh'))
-
-    return g_net
-
-
-def get_descriptor(ctx):
-    """ construct and return descriptor """
-    d_net = gluon.nn.Sequential()
-    with d_net.name_scope():
-
-        d_net.add(SNConv2D(num_filter=64, kernel_size=4, strides=2, padding=1, in_channels=3, ctx=ctx))
-        d_net.add(gluon.nn.LeakyReLU(0.2))
-
-        d_net.add(SNConv2D(num_filter=128, kernel_size=4, strides=2, padding=1, in_channels=64, ctx=ctx))
-        d_net.add(gluon.nn.LeakyReLU(0.2))
-
-        d_net.add(SNConv2D(num_filter=256, kernel_size=4, strides=2, padding=1, in_channels=128, ctx=ctx))
-        d_net.add(gluon.nn.LeakyReLU(0.2))
-
-        d_net.add(SNConv2D(num_filter=512, kernel_size=4, strides=2, padding=1, in_channels=256, ctx=ctx))
-        d_net.add(gluon.nn.LeakyReLU(0.2))
-
-        d_net.add(SNConv2D(num_filter=1, kernel_size=4, strides=1, padding=0, in_channels=512, ctx=ctx))
-
-    return d_net
diff --git a/example/gluon/sn_gan/sn_gan_output.png b/example/gluon/sn_gan/sn_gan_output.png
deleted file mode 100644
index 428c333..0000000
Binary files a/example/gluon/sn_gan/sn_gan_output.png and /dev/null differ
diff --git a/example/gluon/sn_gan/train.py b/example/gluon/sn_gan/train.py
deleted file mode 100644
index fc4e87d..0000000
--- a/example/gluon/sn_gan/train.py
+++ /dev/null
@@ -1,149 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# This example is inspired by https://github.com/jason71995/Keras-GAN-Library,
-# https://github.com/kazizzad/DCGAN-Gluon-MxNet/blob/master/MxnetDCGAN.ipynb
-# https://github.com/apache/incubator-mxnet/blob/master/example/gluon/dc_gan/dcgan.py
-
-
-import os
-import random
-import logging
-import argparse
-
-from data import get_training_data
-from model import get_generator, get_descriptor
-from utils import save_image
-
-import mxnet as mx
-from mxnet import nd, autograd
-from mxnet import gluon
-
-# CLI
-parser = argparse.ArgumentParser(
-    description='train a model for Spectral Normalization GAN.')
-parser.add_argument('--data-path', type=str, default='./data',
-                    help='path of data.')
-parser.add_argument('--batch-size', type=int, default=64,
-                    help='training batch size. default is 64.')
-parser.add_argument('--epochs', type=int, default=100,
-                    help='number of training epochs. default is 100.')
-parser.add_argument('--lr', type=float, default=0.0001,
-                    help='learning rate. default is 0.0001.')
-parser.add_argument('--lr-beta', type=float, default=0.5,
-                    help='learning rate for the beta in margin based loss. default is 0.5.')
-parser.add_argument('--use-gpu', action='store_true',
-                    help='use gpu for training.')
-parser.add_argument('--clip_gr', type=float, default=10.0,
-                    help='Clip the gradient by projecting onto the box. default is 10.0.')
-parser.add_argument('--z-dim', type=int, default=100,
-                    help='dimension of the latent z vector. default is 100.')
-opt = parser.parse_args()
-
-BATCH_SIZE = opt.batch_size
-Z_DIM = opt.z_dim
-NUM_EPOCHS = opt.epochs
-LEARNING_RATE = opt.lr
-BETA = opt.lr_beta
-OUTPUT_DIR = opt.data_path
-CTX = mx.gpu() if opt.use_gpu else mx.cpu()
-CLIP_GRADIENT = opt.clip_gr
-IMAGE_SIZE = 64
-
-
-def facc(label, pred):
-    """ evaluate accuracy """
-    pred = pred.ravel()
-    label = label.ravel()
-    return ((pred > 0.5) == label).mean()
-
-
-# setting
-mx.random.seed(random.randint(1, 10000))
-logging.basicConfig(level=logging.DEBUG)
-
-# create output dir
-try:
-    os.makedirs(opt.data_path)
-except OSError:
-    pass
-
-# get training data
-train_data = get_training_data(opt.batch_size)
-
-# get model
-g_net = get_generator()
-d_net = get_descriptor(CTX)
-
-# define loss function
-loss = gluon.loss.SigmoidBinaryCrossEntropyLoss()
-
-# initialization
-g_net.collect_params().initialize(mx.init.Xavier(), ctx=CTX)
-d_net.collect_params().initialize(mx.init.Xavier(), ctx=CTX)
-g_trainer = gluon.Trainer(
-    g_net.collect_params(), 'Adam', {'learning_rate': LEARNING_RATE, 'beta1': BETA, 'clip_gradient': CLIP_GRADIENT})
-d_trainer = gluon.Trainer(
-    d_net.collect_params(), 'Adam', {'learning_rate': LEARNING_RATE, 'beta1': BETA, 'clip_gradient': CLIP_GRADIENT})
-g_net.collect_params().zero_grad()
-d_net.collect_params().zero_grad()
-# define evaluation metric
-metric = mx.gluon.metric.CustomMetric(facc)
-# initialize labels
-real_label = nd.ones(BATCH_SIZE, CTX)
-fake_label = nd.zeros(BATCH_SIZE, CTX)
-
-for epoch in range(NUM_EPOCHS):
-    for i, (d, _) in enumerate(train_data):
-        # update D
-        data = d.as_in_context(CTX)
-        noise = nd.normal(loc=0, scale=1, shape=(
-            BATCH_SIZE, Z_DIM, 1, 1), ctx=CTX)
-        with autograd.record():
-            # train with real image
-            output = d_net(data).reshape((-1, 1))
-            errD_real = loss(output, real_label)
-            metric.update([real_label, ], [output, ])
-
-            # train with fake image
-            fake_image = g_net(noise)
-            output = d_net(fake_image.detach()).reshape((-1, 1))
-            errD_fake = loss(output, fake_label)
-            errD = errD_real + errD_fake
-            errD.backward()
-            metric.update([fake_label, ], [output, ])
-
-        d_trainer.step(BATCH_SIZE)
-        # update G
-        with autograd.record():
-            fake_image = g_net(noise)
-            output = d_net(fake_image).reshape(-1, 1)
-            errG = loss(output, real_label)
-            errG.backward()
-
-        g_trainer.step(BATCH_SIZE)
-
-        # print log infomation every 100 batches
-        if i % 100 == 0:
-            name, acc = metric.get()
-            logging.info('discriminator loss = %f, generator loss = %f, \
-                          binary training acc = %f at iter %d epoch %d',
-                         nd.mean(errD).asscalar(), nd.mean(errG).asscalar(), acc, i, epoch)
-        if i == 0:
-            save_image(fake_image, epoch, IMAGE_SIZE, BATCH_SIZE, OUTPUT_DIR)
-
-    metric.reset()
diff --git a/example/gluon/sn_gan/utils.py b/example/gluon/sn_gan/utils.py
deleted file mode 100644
index 1a77a6e..0000000
--- a/example/gluon/sn_gan/utils.py
+++ /dev/null
@@ -1,49 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# This example is inspired by https://github.com/jason71995/Keras-GAN-Library,
-# https://github.com/kazizzad/DCGAN-Gluon-MxNet/blob/master/MxnetDCGAN.ipynb
-# https://github.com/apache/incubator-mxnet/blob/master/example/gluon/dc_gan/dcgan.py
-
-import math
-
-import numpy as np
-import imageio
-
-def save_image(data, epoch, image_size, batch_size, output_dir, padding=2):
-    """ save image """
-    data = data.asnumpy().transpose((0, 2, 3, 1))
-    datanp = np.clip(
-        (data - np.min(data))*(255.0/(np.max(data) - np.min(data))), 0, 255).astype(np.uint8)
-    x_dim = min(8, batch_size)
-    y_dim = int(math.ceil(float(batch_size) / x_dim))
-    height, width = int(image_size + padding), int(image_size + padding)
-    grid = np.zeros((height * y_dim + 1 + padding // 2, width *
-                     x_dim + 1 + padding // 2, 3), dtype=np.uint8)
-    k = 0
-    for y in range(y_dim):
-        for x in range(x_dim):
-            if k >= batch_size:
-                break
-            start_y = y * height + 1 + padding // 2
-            end_y = start_y + height - padding
-            start_x = x * width + 1 + padding // 2
-            end_x = start_x + width - padding
-            np.copyto(grid[start_y:end_y, start_x:end_x, :], datanp[k])
-            k += 1
-    imageio.imwrite(
-        '{}/fake_samples_epoch_{}.png'.format(output_dir, epoch), grid)
diff --git a/example/gluon/style_transfer/README.md b/example/gluon/style_transfer/README.md
deleted file mode 100644
index 1d4ef43..0000000
--- a/example/gluon/style_transfer/README.md
+++ /dev/null
@@ -1,134 +0,0 @@
-<!--- Licensed to the Apache Software Foundation (ASF) under one -->
-<!--- or more contributor license agreements.  See the NOTICE file -->
-<!--- distributed with this work for additional information -->
-<!--- regarding copyright ownership.  The ASF licenses this file -->
-<!--- to you under the Apache License, Version 2.0 (the -->
-<!--- "License"); you may not use this file except in compliance -->
-<!--- with the License.  You may obtain a copy of the License at -->
-
-<!---   http://www.apache.org/licenses/LICENSE-2.0 -->
-
-<!--- Unless required by applicable law or agreed to in writing, -->
-<!--- software distributed under the License is distributed on an -->
-<!--- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -->
-<!--- KIND, either express or implied.  See the License for the -->
-<!--- specific language governing permissions and limitations -->
-<!--- under the License. -->
-
-# MXNet-Gluon-Style-Transfer
-
-This repo provides MXNet Implementation of **[Neural Style Transfer](#neural-style)** and **[MSG-Net](#real-time-style-transfer)**. 
-
-**Tabe of content**
-
-* [Slow Neural Style Transfer](#neural-style)
-* [Real-time Style Transfer](#real-time-style-transfer)
-	- [Stylize Images using Pre-trained MSG-Net](#stylize-images-using-pre-trained-msg-net)
-	- [Train Your Own MSG-Net Model](#train-your-own-msg-net-model)
-
-## Neural Style
-
-[A Neural Algorithm of Artistic Style](https://arxiv.org/abs/1508.06576) by Leon A. Gatys, Alexander S. Ecker, and Matthias Bethge.
-
-
-**Download the images**
-
-```bash
-python download_images.py 
-```
-
-**Neural style transfer**
-
-```bash
-python main.py optim --content-image images/content/venice-boat.jpg --style-image images/styles/candy.jpg
-```
-* `--content-image`: path to content image.
-* `--style-image`: path to style image.
-* `--output-image`: path for saving the output image.
-* `--content-size`: the content image size to test on.
-* `--style-size`: the style image size to test on.
-* `--cuda`: set it to 1 for running on GPU, 0 for CPU.
-
-<img src ="https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/example/style_transfer/images/g1.jpg" width="260px" /> <img src ="https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/example/style_transfer/images/g2.jpg" width="260px" />
-<img src ="https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/example/style_transfer/images/g3.jpg" width="260px" />
-<img src ="https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/example/style_transfer/images/g4.jpg" width="260px" />
-<img src ="https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/example/style_transfer/images/g5.jpg" width="260px" />
-<img src ="https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/example/style_transfer/images/g6.jpg" width="260px" />
-<img src ="https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/example/style_transfer/images/g7.jpg" width="260px" />
-<img src ="https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/example/style_transfer/images/g8.jpg" width="260px" />
-<img src ="https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/example/style_transfer/images/g9.jpg" width="260px" />
-
-## Real-time Style Transfer
-<table width="100%" border="0" cellspacing="15" cellpadding="0">
-	<tbody>
-		<tr>
-			<td>
-			<b>Multi-style Generative Network for Real-time Transfer</b>  [<a href="https://arxiv.org/pdf/1703.06953.pdf">arXiv</a>] [<a href="http://computervisionrutgers.github.io/MSG-Net/">project</a>]  <br>
-  <a href="http://hangzh.com/">Hang Zhang</a>,  <a href="http://eceweb1.rutgers.edu/vision/dana.html">Kristin Dana</a>
-<pre>
-@article{zhang2017multistyle,
-	title={Multi-style Generative Network for Real-time Transfer},
-	author={Zhang, Hang and Dana, Kristin},
-	journal={arXiv preprint arXiv:1703.06953},
-	year={2017}
-}
-</pre>
-			</td>
-			<td width="440"><a><img src ="https://raw.githubusercontent.com/zhanghang1989/MSG-Net/master/images/figure1.jpg" width="420px" border="1"></a></td>
-		</tr>
-	</tbody>
-</table>
-
-
-### Stylize Images Using Pre-trained MSG-Net
-0. Download the images and pre-trained model
-	```bash
-    python download_images.py 
-	python models/download_model.py
-	```
-0. Test the model
-	```bash
-	python main.py eval --content-image images/content/venice-boat.jpg --style-image images/styles/candy.jpg --model models/21styles.params --content-size 1024
-	```
-* If you don't have a GPU, simply set `--cuda=0`. For a different style, set `--style-image path/to/style`.
-	If you would to stylize your own photo, change the `--content-image path/to/your/photo`. 
-	More options:
-
-	* `--content-image`: path to content image you want to stylize.
-	* `--style-image`: path to style image (typically covered during the training).
-	* `--model`: path to the pre-trained model to be used for stylizing the image.
-	* `--output-image`: path for saving the output image.
-	* `--content-size`: the content image size to test on.
-	* `--cuda`: set it to 1 for running on GPU, 0 for CPU.
-
-<img src ="https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/example/style_transfer/images/1.jpg" width="260px" /> <img src ="https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/example/style_transfer/images/2.jpg" width="260px" />
-<img src ="https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/example/style_transfer/images/3.jpg" width="260px" />
-<img src ="https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/example/style_transfer/images/4.jpg" width="260px" />
-<img src ="https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/example/style_transfer/images/5.jpg" width="260px" />
-<img src ="https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/example/style_transfer/images/6.jpg" width="260px" />
-<img src ="https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/example/style_transfer/images/7.jpg" width="260px" />
-<img src ="https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/example/style_transfer/images/8.jpg" width="260px" />
-<img src ="https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/example/style_transfer/images/9.jpg" width="260px" />
-
-### Train Your Own MSG-Net Model
-0. Download the style images and COCO dataset
-Note: Dataset from [COCO 2014](http://cocodataset.org/#download).
-The dataset annotations and site are Copyright COCO Consortium and licensed CC BY 4.0 Attribution.
-The images within the dataset are available under the Flickr Terms of Use.
-See original [dataset source](http://cocodataset.org/#termsofuse) for details
-	```bash
-    python download_images.py 
-	python dataset/download_dataset.py
-	```
-0. Train the model
-	```bash
-	python main.py train --epochs 4
-	```
-* If you would like to customize styles, set `--style-folder path/to/your/styles`. More options:
-	* `--style-folder`: path to the folder style images.
-	* `--vgg-model-dir`: path to folder where the vgg model will be downloaded.
-	* `--save-model-dir`: path to folder where trained model will be saved.
-	* `--cuda`: set it to 1 for running on GPU, 0 for CPU.
-
-
-The code is mainly modified from [PyTorch-Style-Transfer](https://github.com/zhanghang1989/PyTorch-Style-Transfer).
diff --git a/example/gluon/style_transfer/data.py b/example/gluon/style_transfer/data.py
deleted file mode 100644
index d2b4ab6..0000000
--- a/example/gluon/style_transfer/data.py
+++ /dev/null
@@ -1,125 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import mxnet.gluon.data as data
-
-from PIL import Image
-import os
-import os.path
-
-IMG_EXTENSIONS = [
-    '.jpg', '.JPG', '.jpeg', '.JPEG',
-    '.png', '.PNG', '.ppm', '.PPM', '.bmp', '.BMP',
-]
-
-
-def is_image_file(filename):
-    return any(filename.endswith(extension) for extension in IMG_EXTENSIONS)
-
-
-def find_classes(dir):
-    classes = [d for d in os.listdir(dir) if os.path.isdir(os.path.join(dir, d))]
-    classes.sort()
-    class_to_idx = {classes[i]: i for i in range(len(classes))}
-    return classes, class_to_idx
-
-
-def make_dataset(dir, class_to_idx):
-    images = []
-    dir = os.path.expanduser(dir)
-    for target in sorted(os.listdir(dir)):
-        d = os.path.join(dir, target)
-        if not os.path.isdir(d):
-            continue
-
-        for root, _, fnames in sorted(os.walk(d)):
-            for fname in sorted(fnames):
-                if is_image_file(fname):
-                    path = os.path.join(root, fname)
-                    item = (path, class_to_idx[target])
-                    images.append(item)
-
-    return images
-
-
-def pil_loader(path):
-    # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)
-    with open(path, 'rb') as f:
-        with Image.open(f) as img:
-            return img.convert('RGB')
-
-
-class ImageFolder(data.Dataset):
-    """A generic data loader where the images are arranged in this way: ::
-
-        root/dog/xxx.png
-        root/dog/xxy.png
-        root/dog/xxz.png
-
-        root/cat/123.png
-        root/cat/nsdf3.png
-        root/cat/asd932_.png
-
-    Args:
-        root (string): Root directory path.
-        transform (callable, optional): A function/transform that  takes in an PIL image
-            and returns a transformed version. E.g, ``transforms.RandomCrop``
-        target_transform (callable, optional): A function/transform that takes in the
-            target and transforms it.
-        loader (callable, optional): A function to load an image given its path.
-
-     Attributes:
-        classes (list): List of the class names.
-        class_to_idx (dict): Dict with items (class_name, class_index).
-        imgs (list): List of (image path, class_index) tuples
-    """
-
-    def __init__(self, root, transform=None, target_transform=None,
-                 loader=pil_loader):
-        classes, class_to_idx = find_classes(root)
-        imgs = make_dataset(root, class_to_idx)
-        if len(imgs) == 0:
-            raise(RuntimeError("Found 0 images in subfolders of: " + root + "\n"
-                               "Supported image extensions are: " + ",".join(IMG_EXTENSIONS)))
-
-        self.root = root
-        self.imgs = imgs
-        self.classes = classes
-        self.class_to_idx = class_to_idx
-        self.transform = transform
-        self.target_transform = target_transform
-        self.loader = loader
-
-    def __getitem__(self, index):
-        """
-        Args:
-            index (int): Index
-
-        Returns:
-            tuple: (image, target) where target is class_index of the target class.
-        """
-        path, target = self.imgs[index]
-        img = self.loader(path)
-        if self.transform is not None:
-            img = self.transform(img)
-        if self.target_transform is not None:
-            target = self.target_transform(target)
-
-        return img, target
-
-    def __len__(self):
-        return len(self.imgs)
diff --git a/example/gluon/style_transfer/dataset/download_dataset.py b/example/gluon/style_transfer/dataset/download_dataset.py
deleted file mode 100644
index 6d32d94..0000000
--- a/example/gluon/style_transfer/dataset/download_dataset.py
+++ /dev/null
@@ -1,37 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import os, zipfile
-import mxnet
-from mxnet.test_utils import download
-
-def unzip_file(filename, outpath):
-    fh = open(filename, 'rb')
-    z = zipfile.ZipFile(fh)
-    for name in z.namelist():
-        z.extract(name, outpath)
-    fh.close()
-
-# Dataset from COCO 2014: http://cocodataset.org/#download
-# The dataset annotations and site are Copyright COCO Consortium and licensed CC BY 4.0 Attribution.
-# The images within the dataset are available under the Flickr Terms of Use.
-# See http://cocodataset.org/#termsofuse for details
-download('http://msvocds.blob.core.windows.net/coco2014/train2014.zip', 'dataset/train2014.zip')
-download('http://msvocds.blob.core.windows.net/coco2014/val2014.zip', 'dataset/val2014.zip')
-
-unzip_file('dataset/train2014.zip', 'dataset')
-unzip_file('dataset/val2014.zip', 'dataset')
diff --git a/example/gluon/style_transfer/download_images.py b/example/gluon/style_transfer/download_images.py
deleted file mode 100644
index 9f7b300..0000000
--- a/example/gluon/style_transfer/download_images.py
+++ /dev/null
@@ -1,20 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import os
-if not os.path.exists('images'):
-        os.system('svn checkout https://github.com/dmlc/web-data/trunk/mxnet/example/style_transfer/images')
diff --git a/example/gluon/style_transfer/main.py b/example/gluon/style_transfer/main.py
deleted file mode 100644
index 816487a..0000000
--- a/example/gluon/style_transfer/main.py
+++ /dev/null
@@ -1,231 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import time
-import random
-import os
-import mxnet as mx
-import numpy as np
-np.set_printoptions(precision=2)
-from PIL import Image
-
-from mxnet import autograd, gluon
-from mxnet.gluon import nn, Block, HybridBlock, Parameter
-import mxnet.ndarray as F
-
-import net
-import utils
-from option import Options
-import data
-
-def train(args):
-    np.random.seed(args.seed)
-    if args.cuda:
-        ctx = mx.gpu(0)
-    else:
-        ctx = mx.cpu(0)
-    # dataloader
-    transform = utils.Compose([utils.Scale(args.image_size),
-                               utils.CenterCrop(args.image_size),
-                               utils.ToTensor(ctx),
-                               ])
-    train_dataset = data.ImageFolder(args.dataset, transform)
-    train_loader = gluon.data.DataLoader(train_dataset, batch_size=args.batch_size,
-                                         last_batch='discard')
-    style_loader = utils.StyleLoader(args.style_folder, args.style_size, ctx=ctx)
-    print('len(style_loader):',style_loader.size())
-    # models
-    vgg = net.Vgg16()
-    utils.init_vgg_params(vgg, 'models', ctx=ctx)
-    style_model = net.Net(ngf=args.ngf)
-    style_model.initialize(init=mx.initializer.MSRAPrelu(), ctx=ctx)
-    if args.resume is not None:
-        print('Resuming, initializing using weight from {}.'.format(args.resume))
-        style_model.load_parameters(args.resume, ctx=ctx)
-    print('style_model:',style_model)
-    # optimizer and loss
-    trainer = gluon.Trainer(style_model.collect_params(), 'adam',
-                            {'learning_rate': args.lr})
-    mse_loss = gluon.loss.L2Loss()
-
-    for e in range(args.epochs):
-        agg_content_loss = 0.
-        agg_style_loss = 0.
-        count = 0
-        for batch_id, (x, _) in enumerate(train_loader):
-            n_batch = len(x)
-            count += n_batch
-            # prepare data
-            style_image = style_loader.get(batch_id)
-            style_v = utils.subtract_imagenet_mean_preprocess_batch(style_image.copy())
-            style_image = utils.preprocess_batch(style_image)
-
-            features_style = vgg(style_v)
-            gram_style = [net.gram_matrix(y) for y in features_style]
-
-            xc = utils.subtract_imagenet_mean_preprocess_batch(x.copy())
-            f_xc_c = vgg(xc)[1]
-            with autograd.record():
-                style_model.set_target(style_image)
-                y = style_model(x)
-
-                y = utils.subtract_imagenet_mean_batch(y)
-                features_y = vgg(y)
-
-                content_loss = 2 * args.content_weight * mse_loss(features_y[1], f_xc_c)
-
-                style_loss = 0.
-                for m in range(len(features_y)):
-                    gram_y = net.gram_matrix(features_y[m])
-                    _, C, _ = gram_style[m].shape
-                    gram_s = F.expand_dims(gram_style[m], 0).broadcast_to((args.batch_size, 1, C, C))
-                    style_loss = style_loss + 2 * args.style_weight * \
-                        mse_loss(gram_y, gram_s[:n_batch, :, :])
-
-                total_loss = content_loss + style_loss
-                total_loss.backward()
-
-            trainer.step(args.batch_size)
-            mx.nd.waitall()
-
-            agg_content_loss += content_loss[0]
-            agg_style_loss += style_loss[0]
-
-            if (batch_id + 1) % args.log_interval == 0:
-                mesg = "{}\tEpoch {}:\t[{}/{}]\tcontent: {:.3f}\tstyle: {:.3f}\ttotal: {:.3f}".format(
-                    time.ctime(), e + 1, count, len(train_dataset),
-                                agg_content_loss.asnumpy()[0] / (batch_id + 1),
-                                agg_style_loss.asnumpy()[0] / (batch_id + 1),
-                                (agg_content_loss + agg_style_loss).asnumpy()[0] / (batch_id + 1)
-                )
-                print(mesg)
-
-
-            if (batch_id + 1) % (4 * args.log_interval) == 0:
-                # save model
-                save_model_filename = "Epoch_" + str(e) + "iters_" + \
-                    str(count) + "_" + str(time.ctime()).replace(' ', '_') + "_" + str(
-                    args.content_weight) + "_" + str(args.style_weight) + ".params"
-                save_model_path = os.path.join(args.save_model_dir, save_model_filename)
-                style_model.save_parameters(save_model_path)
-                print("\nCheckpoint, trained model saved at", save_model_path)
-
-    # save model
-    save_model_filename = "Final_epoch_" + str(args.epochs) + "_" + str(time.ctime()).replace(' ', '_') + "_" + str(
-        args.content_weight) + "_" + str(args.style_weight) + ".params"
-    save_model_path = os.path.join(args.save_model_dir, save_model_filename)
-    style_model.save_parameters(save_model_path)
-    print("\nDone, trained model saved at", save_model_path)
-
-
-def evaluate(args):
-    if args.cuda:
-        ctx = mx.gpu(0)
-    else:
-        ctx = mx.cpu(0)
-    # images
-    content_image = utils.tensor_load_rgbimage(args.content_image,ctx, size=args.content_size, keep_asp=True)
-    style_image = utils.tensor_load_rgbimage(args.style_image, ctx, size=args.style_size)
-    style_image = utils.preprocess_batch(style_image)
-    # model
-    style_model = net.Net(ngf=args.ngf)
-    style_model.load_parameters(args.model, ctx=ctx)
-    # forward
-    style_model.set_target(style_image)
-    output = style_model(content_image)
-    utils.tensor_save_bgrimage(output[0], args.output_image, args.cuda)
-
-
-def optimize(args):
-    """    Gatys et al. CVPR 2017
-    ref: Image Style Transfer Using Convolutional Neural Networks
-    """
-    if args.cuda:
-        ctx = mx.gpu(0)
-    else:
-        ctx = mx.cpu(0)
-    # load the content and style target
-    content_image = utils.tensor_load_rgbimage(args.content_image,ctx, size=args.content_size, keep_asp=True)
-    content_image = utils.subtract_imagenet_mean_preprocess_batch(content_image)
-    style_image = utils.tensor_load_rgbimage(args.style_image, ctx, size=args.style_size)
-    style_image = utils.subtract_imagenet_mean_preprocess_batch(style_image)
-    # load the pre-trained vgg-16 and extract features
-    vgg = net.Vgg16()
-    utils.init_vgg_params(vgg, 'models', ctx=ctx)
-    # content feature
-    f_xc_c = vgg(content_image)[1]
-    # style feature
-    features_style = vgg(style_image)
-    gram_style = [net.gram_matrix(y) for y in features_style]
-    # output
-    output = Parameter('output', shape=content_image.shape)
-    output.initialize(ctx=ctx)
-    output.set_data(content_image)
-    # optimizer
-    trainer = gluon.Trainer([output], 'adam',
-                            {'learning_rate': args.lr})
-    mse_loss = gluon.loss.L2Loss()
-
-    # optimizing the images
-    for e in range(args.iters):
-        utils.imagenet_clamp_batch(output.data(), 0, 255)
-        # fix BN for pre-trained vgg
-        with autograd.record():
-            features_y = vgg(output.data())
-            content_loss = 2 * args.content_weight * mse_loss(features_y[1], f_xc_c)
-            style_loss = 0.
-            for m in range(len(features_y)):
-                gram_y = net.gram_matrix(features_y[m])
-                gram_s = gram_style[m]
-                style_loss = style_loss + 2 * args.style_weight * mse_loss(gram_y, gram_s)
-            total_loss = content_loss + style_loss
-            total_loss.backward()
-
-        trainer.step(1)
-        if (e + 1) % args.log_interval == 0:
-            print('loss:{:.2f}'.format(total_loss.asnumpy()[0]))
-
-    # save the image
-    output = utils.add_imagenet_mean_batch(output.data())
-    utils.tensor_save_bgrimage(output[0], args.output_image, args.cuda)
-
-
-def main():
-    # figure out the experiments type
-    args = Options().parse()
-
-    if args.subcommand is None:
-        raise ValueError("ERROR: specify the experiment type")
-
-    if args.subcommand == "train":
-        # Training the model
-        train(args)
-
-    elif args.subcommand == 'eval':
-        # Test the pre-trained model
-        evaluate(args)
-
-    elif args.subcommand == 'optim':
-        # Gatys et al. using optimization-based approach
-        optimize(args)
-
-    else:
-        raise ValueError('Unknow experiment type')
-
-
-if __name__ == "__main__":
-   main()
diff --git a/example/gluon/style_transfer/models/download_model.py b/example/gluon/style_transfer/models/download_model.py
deleted file mode 100644
index 8d0a855..0000000
--- a/example/gluon/style_transfer/models/download_model.py
+++ /dev/null
@@ -1,31 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import os
-import zipfile
-import shutil
-from mxnet.test_utils import download
-
-zip_file_path = 'models/msgnet_21styles.zip'
-download('https://apache-mxnet.s3-accelerate.amazonaws.com/gluon/models/msgnet_21styles-2cb88353.zip', zip_file_path)
-
-with zipfile.ZipFile(zip_file_path) as zf:
-        zf.extractall()
-
-os.remove(zip_file_path)
-
-shutil.move('msgnet_21styles-2cb88353.params', 'models/21styles.params')
diff --git a/example/gluon/style_transfer/net.py b/example/gluon/style_transfer/net.py
deleted file mode 100644
index 2ca992a..0000000
--- a/example/gluon/style_transfer/net.py
+++ /dev/null
@@ -1,296 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import numpy as np
-import mxnet as mx
-from mxnet import autograd, gluon
-from mxnet.gluon import nn, Block, HybridBlock, Parameter
-from mxnet.base import numeric_types
-import mxnet.ndarray as F
-
-class InstanceNorm(HybridBlock):
-    def __init__(self, axis=1, momentum=0.9, epsilon=1e-5, center=True, scale=False,
-                 beta_initializer='zeros', gamma_initializer='ones',
-                 in_channels=0, **kwargs):
-        super(InstanceNorm, self).__init__(**kwargs)
-        self._kwargs = {'eps': epsilon}
-        if in_channels != 0:
-            self.in_channels = in_channels
-        self.gamma = self.params.get('gamma', grad_req='write' if scale else 'null',
-                                     shape=(in_channels,), init=gamma_initializer,
-                                     allow_deferred_init=True)
-        self.beta = self.params.get('beta', grad_req='write' if center else 'null',
-                                    shape=(in_channels,), init=beta_initializer,
-                                    allow_deferred_init=True)
-
-    def hybrid_forward(self, F, x, gamma, beta):
-        return F.InstanceNorm(x, gamma, beta,
-                           name='fwd', **self._kwargs)
-
-    def __repr__(self):
-        s = '{name}({content}'
-        if hasattr(self, 'in_channels'):
-            s += ', in_channels={0}'.format(self.in_channels)
-        s += ')'
-        return s.format(name=self.__class__.__name__,
-                        content=', '.join(['='.join([k, v.__repr__()])
-                                           for k, v in self._kwargs.items()]))
-
-
-class ReflectancePadding(HybridBlock):
-    def __init__(self, pad_width=None, **kwargs):
-        super(ReflectancePadding, self).__init__(**kwargs)
-        self.pad_width = pad_width
-        
-    def forward(self, x):
-        return F.pad(x, mode='reflect', pad_width=self.pad_width)
-
-    
-class Bottleneck(Block):
-    """ Pre-activation residual block
-    Identity Mapping in Deep Residual Networks
-    ref https://arxiv.org/abs/1603.05027
-    """
-    def __init__(self, inplanes, planes, stride=1, downsample=None, norm_layer=InstanceNorm):
-        super(Bottleneck, self).__init__()
-        self.expansion = 4
-        self.downsample = downsample
-        if self.downsample is not None:
-            self.residual_layer = nn.Conv2D(in_channels=inplanes, 
-                                            channels=planes * self.expansion,
-                                            kernel_size=1, strides=(stride, stride))
-        self.conv_block = nn.Sequential()
-        with self.conv_block.name_scope():
-            self.conv_block.add(norm_layer(in_channels=inplanes))
-            self.conv_block.add(nn.Activation('relu'))
-            self.conv_block.add(nn.Conv2D(in_channels=inplanes, channels=planes, 
-                                 kernel_size=1))
-            self.conv_block.add(norm_layer(in_channels=planes))
-            self.conv_block.add(nn.Activation('relu'))
-            self.conv_block.add(ConvLayer(planes, planes, kernel_size=3, 
-                stride=stride))
-            self.conv_block.add(norm_layer(in_channels=planes))
-            self.conv_block.add(nn.Activation('relu'))
-            self.conv_block.add(nn.Conv2D(in_channels=planes, 
-                                 channels=planes * self.expansion, 
-                                 kernel_size=1))
-        
-    def forward(self, x):
-        if self.downsample is not None:
-            residual = self.residual_layer(x)
-        else:
-            residual = x
-        return residual + self.conv_block(x)
-
-
-class UpBottleneck(Block):
-    """ Up-sample residual block (from MSG-Net paper)
-    Enables passing identity all the way through the generator
-    ref https://arxiv.org/abs/1703.06953
-    """
-    def __init__(self, inplanes, planes, stride=2, norm_layer=InstanceNorm):
-        super(UpBottleneck, self).__init__()
-        self.expansion = 4
-        self.residual_layer = UpsampleConvLayer(inplanes, planes * self.expansion,
-                                                      kernel_size=1, stride=1, upsample=stride)
-        self.conv_block = nn.Sequential()
-        with self.conv_block.name_scope():
-            self.conv_block.add(norm_layer(in_channels=inplanes))
-            self.conv_block.add(nn.Activation('relu'))
-            self.conv_block.add(nn.Conv2D(in_channels=inplanes, channels=planes, 
-                                kernel_size=1))
-            self.conv_block.add(norm_layer(in_channels=planes))
-            self.conv_block.add(nn.Activation('relu'))
-            self.conv_block.add(UpsampleConvLayer(planes, planes, kernel_size=3, stride=1, upsample=stride))
-            self.conv_block.add(norm_layer(in_channels=planes))
-            self.conv_block.add(nn.Activation('relu'))
-            self.conv_block.add(nn.Conv2D(in_channels=planes, 
-                                channels=planes * self.expansion, 
-                                kernel_size=1))
-
-    def forward(self, x):
-        return  self.residual_layer(x) + self.conv_block(x)
-
-
-class ConvLayer(Block):
-    def __init__(self, in_channels, out_channels, kernel_size, stride):
-        super(ConvLayer, self).__init__()
-        padding = int(np.floor(kernel_size / 2))
-        self.pad = ReflectancePadding(pad_width=(0,0,0,0,padding,padding,padding,padding))
-        self.conv2d = nn.Conv2D(in_channels=in_channels, channels=out_channels, 
-                                kernel_size=kernel_size, strides=(stride,stride),
-                                padding=0)
-
-    def forward(self, x):
-        x = self.pad(x)
-        out = self.conv2d(x)
-        return out
-
-
-class UpsampleConvLayer(Block):
-    """UpsampleConvLayer
-    Upsamples the input and then does a convolution. This method gives better results
-    compared to ConvTranspose2d.
-    ref: http://distill.pub/2016/deconv-checkerboard/
-    """
-
-    def __init__(self, in_channels, out_channels, kernel_size, 
-            stride, upsample=None):
-        super(UpsampleConvLayer, self).__init__()
-        self.upsample = upsample
-        self.reflection_padding = int(np.floor(kernel_size / 2))
-        self.conv2d = nn.Conv2D(in_channels=in_channels, 
-                                channels=out_channels, 
-                                kernel_size=kernel_size, strides=(stride,stride),
-                                padding=self.reflection_padding)
-
-    def forward(self, x):
-        if self.upsample:
-            x = F.UpSampling(x, scale=self.upsample, sample_type='nearest')
-        out = self.conv2d(x)
-        return out
-
-
-def gram_matrix(y):
-    (b, ch, h, w) = y.shape
-    features = y.reshape((b, ch, w * h))
-    #features_t = F.SwapAxis(features,1, 2)
-    gram = F.batch_dot(features, features, transpose_b=True) / (ch * h * w)
-    return gram
-
-
-class GramMatrix(Block):
-    def forward(self, x):
-        gram = gram_matrix(x)
-        return gram
-
-class Net(Block):
-    def __init__(self, input_nc=3, output_nc=3, ngf=64, 
-                 norm_layer=InstanceNorm, n_blocks=6, gpu_ids=[]):
-        super(Net, self).__init__()
-        self.gpu_ids = gpu_ids
-        self.gram = GramMatrix()
-
-        block = Bottleneck
-        upblock = UpBottleneck
-        expansion = 4
-
-        with self.name_scope():
-            self.model1 = nn.Sequential()
-            self.ins = Inspiration(ngf*expansion)
-            self.model = nn.Sequential()
-
-            self.model1.add(ConvLayer(input_nc, 64, kernel_size=7, stride=1))
-            self.model1.add(norm_layer(in_channels=64))
-            self.model1.add(nn.Activation('relu'))
-            self.model1.add(block(64, 32, 2, 1, norm_layer))
-            self.model1.add(block(32*expansion, ngf, 2, 1, norm_layer))
-
-
-            self.model.add(self.model1)
-            self.model.add(self.ins)
-
-            for i in range(n_blocks):
-                self.model.add(block(ngf*expansion, ngf, 1, None, norm_layer))
-        
-            self.model.add(upblock(ngf*expansion, 32, 2, norm_layer))
-            self.model.add(upblock(32*expansion, 16, 2, norm_layer))
-            self.model.add(norm_layer(in_channels=16*expansion))
-            self.model.add(nn.Activation('relu'))
-            self.model.add(ConvLayer(16*expansion, output_nc, kernel_size=7, stride=1))
-
-
-    def set_target(self, Xs):
-        F = self.model1(Xs)
-        G = self.gram(F)
-        self.ins.set_target(G)
-
-    def forward(self, input):
-        return self.model(input)
-
-
-class Inspiration(Block):
-    """ Inspiration Layer (from MSG-Net paper)
-    tuning the featuremap with target Gram Matrix
-    ref https://arxiv.org/abs/1703.06953
-    """
-    def __init__(self, C, B=1):
-        super(Inspiration, self).__init__()
-        # B is equal to 1 or input mini_batch
-        self.C = C
-        self.weight = self.params.get('weight', shape=(1,C,C),
-                                      init=mx.initializer.Uniform(),
-                                      allow_deferred_init=True)
-        self.gram = F.random.uniform(shape=(B, C, C))
-
-    def set_target(self, target):
-        self.gram = target
-
-    def forward(self, X):
-        # input X is a 3D feature map
-        self.P = F.batch_dot(F.broadcast_to(self.weight.data(), shape=(self.gram.shape)), self.gram)
-        return F.batch_dot(F.SwapAxis(self.P,1,2).broadcast_to((X.shape[0], self.C, self.C)), X.reshape((0,0,X.shape[2]*X.shape[3]))).reshape(X.shape)
-
-    def __repr__(self):
-        return self.__class__.__name__ + '(' \
-            + 'N x ' + str(self.C) + ')'
-
-
-class Vgg16(Block):
-    def __init__(self):
-        super(Vgg16, self).__init__()
-        self.conv1_1 = nn.Conv2D(in_channels=3, channels=64, kernel_size=3, strides=1, padding=1)
-        self.conv1_2 = nn.Conv2D(in_channels=64, channels=64, kernel_size=3, strides=1, padding=1)
-
-        self.conv2_1 = nn.Conv2D(in_channels=64, channels=128, kernel_size=3, strides=1, padding=1)
-        self.conv2_2 = nn.Conv2D(in_channels=128, channels=128, kernel_size=3, strides=1, padding=1)
-
-        self.conv3_1 = nn.Conv2D(in_channels=128, channels=256, kernel_size=3, strides=1, padding=1)
-        self.conv3_2 = nn.Conv2D(in_channels=256, channels=256, kernel_size=3, strides=1, padding=1)
-        self.conv3_3 = nn.Conv2D(in_channels=256, channels=256, kernel_size=3, strides=1, padding=1)
-
-        self.conv4_1 = nn.Conv2D(in_channels=256, channels=512, kernel_size=3, strides=1, padding=1)
-        self.conv4_2 = nn.Conv2D(in_channels=512, channels=512, kernel_size=3, strides=1, padding=1)
-        self.conv4_3 = nn.Conv2D(in_channels=512, channels=512, kernel_size=3, strides=1, padding=1)
-
-        self.conv5_1 = nn.Conv2D(in_channels=512, channels=512, kernel_size=3, strides=1, padding=1)
-        self.conv5_2 = nn.Conv2D(in_channels=512, channels=512, kernel_size=3, strides=1, padding=1)
-        self.conv5_3 = nn.Conv2D(in_channels=512, channels=512, kernel_size=3, strides=1, padding=1)
-
-    def forward(self, X):
-        h = F.Activation(self.conv1_1(X), act_type='relu')
-        h = F.Activation(self.conv1_2(h), act_type='relu')
-        relu1_2 = h
-        h = F.Pooling(h, pool_type='max', kernel=(2, 2), stride=(2, 2))
-
-        h = F.Activation(self.conv2_1(h), act_type='relu')
-        h = F.Activation(self.conv2_2(h), act_type='relu')
-        relu2_2 = h
-        h = F.Pooling(h, pool_type='max', kernel=(2, 2), stride=(2, 2))
-
-        h = F.Activation(self.conv3_1(h), act_type='relu')
-        h = F.Activation(self.conv3_2(h), act_type='relu')
-        h = F.Activation(self.conv3_3(h), act_type='relu')
-        relu3_3 = h
-        h = F.Pooling(h, pool_type='max', kernel=(2, 2), stride=(2, 2))
-
-        h = F.Activation(self.conv4_1(h), act_type='relu')
-        h = F.Activation(self.conv4_2(h), act_type='relu')
-        h = F.Activation(self.conv4_3(h), act_type='relu')
-        relu4_3 = h
-
-        return [relu1_2, relu2_2, relu3_3, relu4_3]
diff --git a/example/gluon/style_transfer/option.py b/example/gluon/style_transfer/option.py
deleted file mode 100644
index 5faa522..0000000
--- a/example/gluon/style_transfer/option.py
+++ /dev/null
@@ -1,109 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import argparse
-import os
-
-class Options():
-    def __init__(self):
-        self.parser = argparse.ArgumentParser(description="parser for MXNet-Gluon-Style-Transfer")
-        subparsers = self.parser.add_subparsers(title="subcommands", dest="subcommand")
-
-        # training args
-        train_arg = subparsers.add_parser("train",
-                                    help="parser for training arguments")
-        train_arg.add_argument("--ngf", type=int, default=128,
-                                help="number of generator filter channels, default 128")
-        train_arg.add_argument("--epochs", type=int, default=4,
-                                help="number of training epochs, default is 2")
-        train_arg.add_argument("--batch-size", type=int, default=4,
-                                help="batch size for training, default is 4")
-        train_arg.add_argument("--dataset", type=str, default="dataset/",
-                                help="path to training dataset, the path should point to a folder "
-                                "containing another folder with all the training images")
-        train_arg.add_argument("--style-folder", type=str, default="images/styles/",
-                                help="path to style-folder")
-        train_arg.add_argument("--save-model-dir", type=str, default="models/",
-                                help="path to folder where trained model will be saved.")
-        train_arg.add_argument("--image-size", type=int, default=256,
-                                help="size of training images, default is 256 X 256")
-        train_arg.add_argument("--style-size", type=int, default=512,
-                                help="size of style-image, default is the original size of style image")
-        train_arg.add_argument("--cuda", type=int, default=1, 
-                                help="set it to 1 for running on GPU, 0 for CPU")
-        train_arg.add_argument("--seed", type=int, default=42, 
-                                help="random seed for training")
-        train_arg.add_argument("--content-weight", type=float, default=1.0,
-                                help="weight for content-loss, default is 1.0")
-        train_arg.add_argument("--style-weight", type=float, default=5.0,
-                                help="weight for style-loss, default is 5.0")
-        train_arg.add_argument("--lr", type=float, default=1e-3,
-                                help="learning rate, default is 0.001")
-        train_arg.add_argument("--log-interval", type=int, default=500,
-                                help="number of images after which the training loss is logged, default is 500")
-        train_arg.add_argument("--resume", type=str, default=None,
-                                help="resume if needed")
-
-        # optim args (Gatys CVPR 2016)
-        optim_arg = subparsers.add_parser("optim",
-                                    help="parser for optimization arguments")
-        optim_arg.add_argument("--iters", type=int, default=500,
-                                help="number of training iterations, default is 500")
-        optim_arg.add_argument("--content-image", type=str, default="images/content/venice-boat.jpg",
-                                help="path to content image you want to stylize")
-        optim_arg.add_argument("--style-image", type=str, default="images/9styles/candy.jpg",
-                                help="path to style-image")
-        optim_arg.add_argument("--content-size", type=int, default=512,
-                                help="factor for scaling down the content image")
-        optim_arg.add_argument("--style-size", type=int, default=512,
-                                help="size of style-image, default is the original size of style image")
-        optim_arg.add_argument("--output-image", type=str, default="output.jpg",
-                                help="path for saving the output image")
-        optim_arg.add_argument("--cuda", type=int, default=1, 
-                                help="set it to 1 for running on GPU, 0 for CPU")
-        optim_arg.add_argument("--content-weight", type=float, default=1.0,
-                                help="weight for content-loss, default is 1.0")
-        optim_arg.add_argument("--style-weight", type=float, default=5.0,
-                                help="weight for style-loss, default is 5.0")
-        optim_arg.add_argument("--lr", type=float, default=1e1,
-                                help="learning rate, default is 0.001")
-        optim_arg.add_argument("--log-interval", type=int, default=50,
-                                help="number of images after which the training loss is logged, default is 50")    
-
-        # evaluation args
-        eval_arg = subparsers.add_parser("eval", help="parser for evaluation/stylizing arguments")
-        eval_arg.add_argument("--ngf", type=int, default=128,
-                                help="number of generator filter channels, default 128")
-        eval_arg.add_argument("--content-image", type=str, required=True,
-                                help="path to content image you want to stylize")
-        eval_arg.add_argument("--style-image", type=str, default="images/9styles/candy.jpg",
-                                help="path to style-image")
-        eval_arg.add_argument("--content-size", type=int, default=512,
-                                help="factor for scaling down the content image")
-        eval_arg.add_argument("--style-size", type=int, default=512,
-                                help="size of style-image, default is the original size of style image")
-        eval_arg.add_argument("--style-folder", type=str, default="images/9styles/",
-                                help="path to style-folder")
-        eval_arg.add_argument("--output-image", type=str, default="output.jpg",
-                                help="path for saving the output image")
-        eval_arg.add_argument("--model", type=str, required=True,
-                                help="saved model to be used for stylizing the image")
-        eval_arg.add_argument("--cuda", type=int, default=1,
-                                help="set it to 1 for running on GPU, 0 for CPU")    
-
-    def parse(self):
-        return self.parser.parse_args()
diff --git a/example/gluon/style_transfer/utils.py b/example/gluon/style_transfer/utils.py
deleted file mode 100644
index f869512..0000000
--- a/example/gluon/style_transfer/utils.py
+++ /dev/null
@@ -1,229 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import collections
-import os
-import numbers
-from PIL import Image
-
-import numpy as np
-import mxnet as mx
-import mxnet.ndarray as F
-
-
-def tensor_load_rgbimage(filename, ctx, size=None, scale=None, keep_asp=False):
-    img = Image.open(filename).convert('RGB')
-    if size is not None:
-        if keep_asp:
-            size2 = int(size * 1.0 / img.size[0] * img.size[1])
-            img = img.resize((size, size2), Image.ANTIALIAS)
-        else:
-            img = img.resize((size, size), Image.ANTIALIAS)
-
-    elif scale is not None:
-        img = img.resize((int(img.size[0] / scale), int(img.size[1] / scale)), Image.ANTIALIAS)
-    img = np.array(img).transpose(2, 0, 1).astype(float)
-    img = F.expand_dims(mx.nd.array(img, ctx=ctx), 0)
-    return img
-
-
-def tensor_save_rgbimage(img, filename, cuda=False):
-    img = F.clip(img, 0, 255).asnumpy()
-    img = img.transpose(1, 2, 0).astype('uint8')
-    img = Image.fromarray(img)
-    img.save(filename)
-
-
-def tensor_save_bgrimage(tensor, filename, cuda=False):
-    (b, g, r) = F.split(tensor, num_outputs=3, axis=0)
-    tensor = F.concat(r, g, b, dim=0)
-    tensor_save_rgbimage(tensor, filename, cuda)
-
-
-def subtract_imagenet_mean_batch(batch):
-    """Subtract ImageNet mean pixel-wise from a BGR image."""
-    batch = F.swapaxes(batch,0, 1)
-    (r, g, b) = F.split(batch, num_outputs=3, axis=0)
-    r = r - 123.680
-    g = g - 116.779
-    b = b - 103.939
-    batch = F.concat(r, g, b, dim=0)
-    batch = F.swapaxes(batch,0, 1)
-    return batch
-
-
-def subtract_imagenet_mean_preprocess_batch(batch):
-    """Subtract ImageNet mean pixel-wise from a BGR image."""
-    batch = F.swapaxes(batch,0, 1)
-    (r, g, b) = F.split(batch, num_outputs=3, axis=0)
-    r = r - 123.680
-    g = g - 116.779
-    b = b - 103.939
-    batch = F.concat(b, g, r, dim=0)
-    batch = F.swapaxes(batch,0, 1)
-    return batch
-
-
-def add_imagenet_mean_batch(batch):
-    batch = F.swapaxes(batch,0, 1)
-    (b, g, r) = F.split(batch, num_outputs=3, axis=0)
-    r = r + 123.680
-    g = g + 116.779
-    b = b + 103.939
-    batch = F.concat(b, g, r, dim=0)
-    batch = F.swapaxes(batch,0, 1)
-    """
-    batch = denormalizer(batch)
-    """
-    return batch
-
-
-def imagenet_clamp_batch(batch, low, high):
-    """ Not necessary in practice """
-    F.clip(batch[:,0,:,:],low-123.680, high-123.680)
-    F.clip(batch[:,1,:,:],low-116.779, high-116.779)
-    F.clip(batch[:,2,:,:],low-103.939, high-103.939)
-
-
-def preprocess_batch(batch):
-    batch = F.swapaxes(batch, 0, 1)
-    (r, g, b) = F.split(batch, num_outputs=3, axis=0)
-    batch = F.concat(b, g, r, dim=0)
-    batch = F.swapaxes(batch, 0, 1)
-    return batch
-
-
-class ToTensor(object):
-    def __init__(self, ctx):
-        self.ctx = ctx
-
-    def __call__(self, img):
-        img = mx.nd.array(np.array(img).transpose(2, 0, 1).astype('float32'), ctx=self.ctx)
-        return img
-
-
-class Compose(object):
-    """Composes several transforms together.
-    Args:
-        transforms (list of ``Transform`` objects): list of transforms to compose.
-    Example:
-        >>> transforms.Compose([
-        >>>     transforms.CenterCrop(10),
-        >>>     transforms.ToTensor(),
-        >>> ])
-    """
-
-    def __init__(self, transforms):
-        self.transforms = transforms
-
-    def __call__(self, img):
-        for t in self.transforms:
-            img = t(img)
-        return img
-
-
-class Scale(object):
-    """Rescale the input PIL.Image to the given size.
-    Args:
-        size (sequence or int): Desired output size. If size is a sequence like
-            (w, h), output size will be matched to this. If size is an int,
-            smaller edge of the image will be matched to this number.
-            i.e, if height > width, then image will be rescaled to
-            (size * height / width, size)
-        interpolation (int, optional): Desired interpolation. Default is
-            ``PIL.Image.BILINEAR``
-    """
-
-    def __init__(self, size, interpolation=Image.BILINEAR):
-        assert isinstance(size, int) or (isinstance(size, collections.Iterable) and len(size) == 2)
-        self.size = size
-        self.interpolation = interpolation
-
-    def __call__(self, img):
-        """
-        Args:
-            img (PIL.Image): Image to be scaled.
-        Returns:
-            PIL.Image: Rescaled image.
-        """
-        if isinstance(self.size, int):
-            w, h = img.size
-            if (w <= h and w == self.size) or (h <= w and h == self.size):
-                return img
-            if w < h:
-                ow = self.size
-                oh = int(self.size * h / w)
-                return img.resize((ow, oh), self.interpolation)
-            else:
-                oh = self.size
-                ow = int(self.size * w / h)
-                return img.resize((ow, oh), self.interpolation)
-        else:
-            return img.resize(self.size, self.interpolation)
-
-
-class CenterCrop(object):
-    """Crops the given PIL.Image at the center.
-    Args:
-        size (sequence or int): Desired output size of the crop. If size is an
-            int instead of sequence like (h, w), a square crop (size, size) is
-            made.
-    """
-
-    def __init__(self, size):
-        if isinstance(size, numbers.Number):
-            self.size = (int(size), int(size))
-        else:
-            self.size = size
-
-    def __call__(self, img):
-        """
-        Args:
-            img (PIL.Image): Image to be cropped.
-        Returns:
-            PIL.Image: Cropped image.
-        """
-        w, h = img.size
-        th, tw = self.size
-        x1 = int(round((w - tw) / 2.))
-        y1 = int(round((h - th) / 2.))
-        return img.crop((x1, y1, x1 + tw, y1 + th))
-
-
-class StyleLoader():
-    def __init__(self, style_folder, style_size, ctx):
-        self.folder = style_folder
-        self.style_size = style_size
-        self.files = os.listdir(style_folder)
-        assert(len(self.files) > 0)
-        self.ctx = ctx
-
-    def get(self, i):
-        idx = i%len(self.files)
-        filepath = os.path.join(self.folder, self.files[idx])
-        style = tensor_load_rgbimage(filepath, self.ctx, self.style_size)
-        return style
-
-    def size(self):
-        return len(self.files)
-
-def init_vgg_params(vgg, model_folder, ctx):
-    if not os.path.exists(os.path.join(model_folder, 'mxvgg.params')):
-        os.system('wget https://www.dropbox.com/s/7c92s0guekwrwzf/mxvgg.params?dl=1 -O' + os.path.join(model_folder, 'mxvgg.params'))
-    vgg.collect_params().load(os.path.join(model_folder, 'mxvgg.params'), ctx=ctx)
-    for param in vgg.collect_params().values():
-        param.grad_req = 'null'
diff --git a/example/gluon/super_resolution/super_resolution.py b/example/gluon/super_resolution/super_resolution.py
index 52bfc22..7553516 100644
--- a/example/gluon/super_resolution/super_resolution.py
+++ b/example/gluon/super_resolution/super_resolution.py
@@ -30,7 +30,6 @@ import numpy as np
 import mxnet as mx
 from mxnet import gluon, autograd as ag
 from mxnet.gluon import nn
-from mxnet.gluon.contrib import nn as contrib_nn
 from mxnet.image import CenterCropAug, ResizeAug
 from mxnet.io import PrefetchingIter
 from mxnet.test_utils import download
@@ -133,21 +132,20 @@ def get_dataset(prefetch=False):
 
 train_data, val_data = get_dataset()
 
-mx.random.seed(opt.seed)
+mx.np.random.seed(opt.seed)
 ctx = [mx.gpu(0)] if opt.use_gpu else [mx.cpu()]
 
 
 class SuperResolutionNet(gluon.HybridBlock):
     def __init__(self, upscale_factor):
         super(SuperResolutionNet, self).__init__()
-        with self.name_scope():
-            self.conv1 = nn.Conv2D(64, (5, 5), strides=(1, 1), padding=(2, 2), activation='relu')
-            self.conv2 = nn.Conv2D(64, (3, 3), strides=(1, 1), padding=(1, 1), activation='relu')
-            self.conv3 = nn.Conv2D(32, (3, 3), strides=(1, 1), padding=(1, 1), activation='relu')
-            self.conv4 = nn.Conv2D(upscale_factor ** 2, (3, 3), strides=(1, 1), padding=(1, 1))
-            self.pxshuf = contrib_nn.PixelShuffle2D(upscale_factor)
-
-    def hybrid_forward(self, F, x):
+        self.conv1 = nn.Conv2D(64, (5, 5), strides=(1, 1), padding=(2, 2), activation='relu')
+        self.conv2 = nn.Conv2D(64, (3, 3), strides=(1, 1), padding=(1, 1), activation='relu')
+        self.conv3 = nn.Conv2D(32, (3, 3), strides=(1, 1), padding=(1, 1), activation='relu')
+        self.conv4 = nn.Conv2D(upscale_factor ** 2, (3, 3), strides=(1, 1), padding=(1, 1))
+        self.pxshuf = nn.PixelShuffle2D(upscale_factor)
+
+    def forward(self, x):
         x = self.conv1(x)
         x = self.conv2(x)
         x = self.conv3(x)
@@ -219,8 +217,8 @@ def resolve(ctx):
     net.load_parameters(path.join(this_dir, 'superres.params'), ctx=ctx)
     img = Image.open(opt.resolve_img).convert('YCbCr')
     y, cb, cr = img.split()
-    data = mx.nd.expand_dims(mx.nd.expand_dims(mx.nd.array(y), axis=0), axis=0)
-    out_img_y = mx.nd.reshape(net(data), shape=(-3, -2)).asnumpy()
+    data = mx.np.expand_dims(mx.np.expand_dims(mx.np.array(y), axis=0), axis=0)
+    out_img_y = mx.np.reshape(net(data), shape=(-3, -2)).asnumpy()
     out_img_y = out_img_y.clip(0, 255)
     out_img_y = Image.fromarray(np.uint8(out_img_y[0]), mode='L')
 
diff --git a/example/gluon/tree_lstm/LICENSE b/example/gluon/tree_lstm/LICENSE
deleted file mode 100644
index 441cb8a..0000000
--- a/example/gluon/tree_lstm/LICENSE
+++ /dev/null
@@ -1,21 +0,0 @@
-MIT License
-
-Copyright (c) 2017 Riddhiman Dasgupta, Sheng Zha
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
diff --git a/example/gluon/tree_lstm/README.md b/example/gluon/tree_lstm/README.md
deleted file mode 100644
index 8e3b385..0000000
--- a/example/gluon/tree_lstm/README.md
+++ /dev/null
@@ -1,46 +0,0 @@
-<!--- Licensed to the Apache Software Foundation (ASF) under one -->
-<!--- or more contributor license agreements.  See the NOTICE file -->
-<!--- distributed with this work for additional information -->
-<!--- regarding copyright ownership.  The ASF licenses this file -->
-<!--- to you under the Apache License, Version 2.0 (the -->
-<!--- "License"); you may not use this file except in compliance -->
-<!--- with the License.  You may obtain a copy of the License at -->
-
-<!---   http://www.apache.org/licenses/LICENSE-2.0 -->
-
-<!--- Unless required by applicable law or agreed to in writing, -->
-<!--- software distributed under the License is distributed on an -->
-<!--- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -->
-<!--- KIND, either express or implied.  See the License for the -->
-<!--- specific language governing permissions and limitations -->
-<!--- under the License. -->
-
-
-# Tree-Structured Long Short-Term Memory Networks
-This is a [MXNet Gluon](https://mxnet.io/) implementation of Tree-LSTM as described in the paper [Improved Semantic Representations From Tree-Structured Long Short-Term Memory Networks](http://arxiv.org/abs/1503.00075) by Kai Sheng Tai, Richard Socher, and Christopher Manning. 
-
-### Requirements
-- Python (tested on **3.6.5**, should work on **>=2.7**)
-- Java >= 8 (for Stanford CoreNLP utilities)
-- Other dependencies are in `requirements.txt`
-Note: Currently works with MXNet 1.3.0.
-
-### Usage
-Before delving into how to run the code, here is a quick overview of the contents:
- - Use the script `fetch_and_preprocess.sh` to download the [SICK dataset](http://alt.qcri.org/semeval2014/task1/index.php?id=data-and-tools), [Stanford Parser](http://nlp.stanford.edu/software/lex-parser.shtml) and [Stanford POS Tagger](http://nlp.stanford.edu/software/tagger.shtml), and [Glove word vectors](http://nlp.stanford.edu/projects/glove/) (Common Crawl 840) -- **Warning:** this is a 2GB download!), and additionally preprocess the data, i.e. generate dependency parses using [St [...]
-- `main.py`does the actual heavy lifting of training the model and testing it on the SICK dataset. For a list of all command-line arguments, have a look at `python main.py -h`.
-- The first run caches GLOVE embeddings for words in the SICK vocabulary. In later runs, only the cache is read in during later runs.
-
-Next, these are the different ways to run the code here to train a TreeLSTM model.
-#### Local Python Environment
-If you have a working Python3 environment, simply run the following sequence of steps:
-
-```
-- bash fetch_and_preprocess.sh
-- python main.py
-```
-
-
-### Acknowledgments
-- The Gluon version is ported from this implementation [dasguptar/treelstm.pytorch](https://github.com/dasguptar/treelstm.pytorch)
-- Shout-out to [Kai Sheng Tai](https://github.com/kaishengtai/) for the [original LuaTorch implementation](https://github.com/stanfordnlp/treelstm), and to the [Pytorch team](https://github.com/pytorch/pytorch#the-team) for the fun library.
diff --git a/example/gluon/tree_lstm/dataset.cPickle b/example/gluon/tree_lstm/dataset.cPickle
deleted file mode 100644
index bdfca53..0000000
Binary files a/example/gluon/tree_lstm/dataset.cPickle and /dev/null differ
diff --git a/example/gluon/tree_lstm/dataset.py b/example/gluon/tree_lstm/dataset.py
deleted file mode 100644
index 5d6b766..0000000
--- a/example/gluon/tree_lstm/dataset.py
+++ /dev/null
@@ -1,231 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import logging
-import os
-import random
-
-import numpy as np
-
-import mxnet as mx
-from tqdm import tqdm
-
-logging.basicConfig(level=logging.INFO)
-
-
-class Vocab(object):
-    # constants for special tokens: padding, unknown, and beginning/end of sentence.
-    PAD = 0
-    UNK = 1
-    BOS = 2
-    EOS = 3
-    PAD_WORD = '<blank>'
-    UNK_WORD = '<unk>'
-    BOS_WORD = '<s>'
-    EOS_WORD = '</s>'
-
-    def __init__(self, filepaths=[], embedpath=None, include_unseen=False, lower=False):
-        self.idx2tok = []
-        self.tok2idx = {}
-        self.lower = lower
-        self.include_unseen = include_unseen
-
-        self.add(Vocab.PAD_WORD)
-        self.add(Vocab.UNK_WORD)
-        self.add(Vocab.BOS_WORD)
-        self.add(Vocab.EOS_WORD)
-
-        self.embed = None
-
-        for filename in filepaths:
-            logging.info('loading %s'%filename)
-            with open(filename, 'r') as f:
-                self.load_file(f)
-        if embedpath is not None:
-            logging.info('loading %s'%embedpath)
-            with open(embedpath, 'r') as f:
-                self.load_embedding(f, reset=set([Vocab.PAD_WORD, Vocab.UNK_WORD, Vocab.BOS_WORD,
-                                                  Vocab.EOS_WORD]))
-
-    @property
-    def size(self):
-        return len(self.idx2tok)
-
-    def get_index(self, key):
-        return self.tok2idx.get(key.lower() if self.lower else key,
-                                Vocab.UNK)
-
-    def get_token(self, idx):
-        if idx < self.size:
-            return self.idx2tok[idx]
-        else:
-            return Vocab.UNK_WORD
-
-    def add(self, token):
-        token = token.lower() if self.lower else token
-        if token in self.tok2idx:
-            idx = self.tok2idx[token]
-        else:
-            idx = len(self.idx2tok)
-            self.idx2tok.append(token)
-            self.tok2idx[token] = idx
-        return idx
-
-    def to_indices(self, tokens, add_bos=False, add_eos=False):
-        vec = [Vocab.BOS] if add_bos else []
-        vec += [self.get_index(token) for token in tokens]
-        if add_eos:
-            vec.append(Vocab.EOS)
-        return vec
-
-    def to_tokens(self, indices, stop):
-        tokens = []
-        for i in indices:
-            tokens += [self.get_token(i)]
-            if i == stop:
-                break
-        return tokens
-
-    def load_file(self, f):
-        for line in f:
-            tokens = line.rstrip('\n').split()
-            for token in tokens:
-                self.add(token)
-
-    def load_embedding(self, f, reset=[]):
-        vectors = {}
-        for line in tqdm(f.readlines(), desc='Loading embeddings'):
-            tokens = line.rstrip('\n').split(' ')
-            word = tokens[0].lower() if self.lower else tokens[0]
-            if self.include_unseen:
-                self.add(word)
-            if word in self.tok2idx:
-                vectors[word] = [float(x) for x in tokens[1:]]
-        dim = len(list(vectors.values())[0])
-        def to_vector(tok):
-            if tok in vectors and tok not in reset:
-                return vectors[tok]
-            elif tok not in vectors:
-                return np.random.normal(-0.05, 0.05, size=dim)
-            else:
-                return [0.0]*dim
-        self.embed = mx.nd.array([vectors[tok] if tok in vectors and tok not in reset
-                                  else [0.0]*dim for tok in self.idx2tok])
-
-class Tree(object):
-    def __init__(self, idx):
-        self.children = []
-        self.idx = idx
-
-    def __repr__(self):
-        if self.children:
-            return '{0}: {1}'.format(self.idx, str(self.children))
-        else:
-            return str(self.idx)
-
-# Dataset class for SICK dataset
-class SICKDataIter(object):
-    def __init__(self, path, vocab, num_classes, shuffle=True):
-        super(SICKDataIter, self).__init__()
-        self.vocab = vocab
-        self.num_classes = num_classes
-        self.l_sentences = self.read_sentences(os.path.join(path,'a.toks'))
-        self.r_sentences = self.read_sentences(os.path.join(path,'b.toks'))
-        self.l_trees = self.read_trees(os.path.join(path,'a.parents'))
-        self.r_trees = self.read_trees(os.path.join(path,'b.parents'))
-        self.labels = self.read_labels(os.path.join(path,'sim.txt'))
-        self.size = len(self.labels)
-        self.shuffle = shuffle
-        self.reset()
-
-    def reset(self):
-        if self.shuffle:
-            mask = list(range(self.size))
-            random.shuffle(mask)
-            self.l_sentences = [self.l_sentences[i] for i in mask]
-            self.r_sentences = [self.r_sentences[i] for i in mask]
-            self.l_trees = [self.l_trees[i] for i in mask]
-            self.r_trees = [self.r_trees[i] for i in mask]
-            self.labels = [self.labels[i] for i in mask]
-        self.index = 0
-
-    def next(self):
-        out = self[self.index]
-        self.index += 1
-        return out
-
-    def set_context(self, context):
-        self.l_sentences = [a.as_in_context(context) for a in self.l_sentences]
-        self.r_sentences = [a.as_in_context(context) for a in self.r_sentences]
-
-    def __len__(self):
-        return self.size
-
-    def __getitem__(self, index):
-        l_tree = self.l_trees[index]
-        r_tree = self.r_trees[index]
-        l_sent = self.l_sentences[index]
-        r_sent = self.r_sentences[index]
-        label = self.labels[index]
-        return (l_tree,l_sent,r_tree,r_sent,label)
-
-    def read_sentence(self, line):
-        indices = self.vocab.to_indices(line.split())
-        return mx.nd.array(indices)
-
-    def read_sentences(self, filename):
-        with open(filename,'r') as f:
-            sentences = [self.read_sentence(line) for line in f.readlines()]
-        return sentences
-
-    def read_tree(self, line):
-        parents = [int(x) for x in line.split()]
-        nodes = {}
-        root = None
-        for i in range(1,len(parents)+1):
-            if i-1 not in nodes and parents[i-1]!=-1:
-                idx = i
-                prev = None
-                while True:
-                    parent = parents[idx-1]
-                    if parent == -1:
-                        break
-                    tree = Tree(idx)
-                    if prev is not None:
-                        tree.children.append(prev)
-                    nodes[idx-1] = tree
-                    tree.idx = idx-1
-                    if parent-1 in nodes:
-                        nodes[parent-1].children.append(tree)
-                        break
-                    elif parent==0:
-                        root = tree
-                        break
-                    else:
-                        prev = tree
-                        idx = parent
-        return root
-
-    def read_trees(self, filename):
-        with open(filename,'r') as f:
-            trees = [self.read_tree(line) for line in tqdm(f.readlines(), 'Parsing trees')]
-        return trees
-
-    def read_labels(self, filename):
-        with open(filename,'r') as f:
-            labels = [float(x) for x in f.readlines()]
-        return labels
diff --git a/example/gluon/tree_lstm/fetch_and_preprocess.sh b/example/gluon/tree_lstm/fetch_and_preprocess.sh
deleted file mode 100755
index a9b9d28..0000000
--- a/example/gluon/tree_lstm/fetch_and_preprocess.sh
+++ /dev/null
@@ -1,25 +0,0 @@
-#!/bin/bash
-
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-set -e
-python scripts/download.py
-
-CLASSPATH="lib:lib/stanford-parser/stanford-parser.jar:lib/stanford-parser/stanford-parser-3.5.1-models.jar"
-javac -cp $CLASSPATH lib/*.java
-python scripts/preprocess-sick.py
diff --git a/example/gluon/tree_lstm/lib/CollapseUnaryTransformer.java b/example/gluon/tree_lstm/lib/CollapseUnaryTransformer.java
deleted file mode 100644
index a0ff193..0000000
--- a/example/gluon/tree_lstm/lib/CollapseUnaryTransformer.java
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-import java.util.List;
-
-import edu.stanford.nlp.ling.Label;
-import edu.stanford.nlp.trees.Tree;
-import edu.stanford.nlp.trees.TreeTransformer;
-import edu.stanford.nlp.util.Generics;
-
-/**
- * This transformer collapses chains of unary nodes so that the top
- * node is the only node left.  The Sentiment model does not handle
- * unary nodes, so this simplifies them to make a binary tree consist
- * entirely of binary nodes and preterminals.  A new tree with new
- * nodes and labels is returned; the original tree is unchanged.
- *
- * @author John Bauer
- */
-public class CollapseUnaryTransformer implements TreeTransformer {
-  public Tree transformTree(Tree tree) {
-    if (tree.isPreTerminal() || tree.isLeaf()) {
-      return tree.deepCopy();
-    }
-
-    Label label = tree.label().labelFactory().newLabel(tree.label());
-    Tree[] children = tree.children();
-    while (children.length == 1 && !children[0].isLeaf()) {
-      children = children[0].children();
-    }
-    List<Tree> processedChildren = Generics.newArrayList();
-    for (Tree child : children) {
-      processedChildren.add(transformTree(child));
-    }
-    return tree.treeFactory().newTreeNode(label, processedChildren);
-  }
-}
diff --git a/example/gluon/tree_lstm/lib/ConstituencyParse.java b/example/gluon/tree_lstm/lib/ConstituencyParse.java
deleted file mode 100644
index 346138c..0000000
--- a/example/gluon/tree_lstm/lib/ConstituencyParse.java
+++ /dev/null
@@ -1,253 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-import edu.stanford.nlp.process.WordTokenFactory;
-import edu.stanford.nlp.ling.HasWord;
-import edu.stanford.nlp.ling.Word;
-import edu.stanford.nlp.ling.CoreLabel;
-import edu.stanford.nlp.process.PTBTokenizer;
-import edu.stanford.nlp.util.StringUtils;
-import edu.stanford.nlp.parser.lexparser.LexicalizedParser;
-import edu.stanford.nlp.parser.lexparser.TreeBinarizer;
-import edu.stanford.nlp.trees.GrammaticalStructure;
-import edu.stanford.nlp.trees.GrammaticalStructureFactory;
-import edu.stanford.nlp.trees.PennTreebankLanguagePack;
-import edu.stanford.nlp.trees.Tree;
-import edu.stanford.nlp.trees.Trees;
-import edu.stanford.nlp.trees.TreebankLanguagePack;
-import edu.stanford.nlp.trees.TypedDependency;
-
-import java.io.BufferedWriter;
-import java.io.FileWriter;
-import java.io.StringReader;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.List;
-import java.util.HashMap;
-import java.util.Properties;
-import java.util.Scanner;
-
-public class ConstituencyParse {
-
-  private boolean tokenize;
-  private BufferedWriter tokWriter, parentWriter;
-  private LexicalizedParser parser;
-  private TreeBinarizer binarizer;
-  private CollapseUnaryTransformer transformer;
-  private GrammaticalStructureFactory gsf;
-
-  private static final String PCFG_PATH = "edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz";
-
-  public ConstituencyParse(String tokPath, String parentPath, boolean tokenize) throws IOException {
-    this.tokenize = tokenize;
-    if (tokPath != null) {
-      tokWriter = new BufferedWriter(new FileWriter(tokPath));
-    }
-    parentWriter = new BufferedWriter(new FileWriter(parentPath));
-    parser = LexicalizedParser.loadModel(PCFG_PATH);
-    binarizer = TreeBinarizer.simpleTreeBinarizer(
-      parser.getTLPParams().headFinder(), parser.treebankLanguagePack());
-    transformer = new CollapseUnaryTransformer();
-
-    // set up to produce dependency representations from constituency trees
-    TreebankLanguagePack tlp = new PennTreebankLanguagePack();
-    gsf = tlp.grammaticalStructureFactory();
-  }
-
-  public List<HasWord> sentenceToTokens(String line) {
-    List<HasWord> tokens = new ArrayList<>();
-    if (tokenize) {
-      PTBTokenizer<Word> tokenizer = new PTBTokenizer(new StringReader(line), new WordTokenFactory(), "");
-      for (Word label; tokenizer.hasNext(); ) {
-        tokens.add(tokenizer.next());
-      }
-    } else {
-      for (String word : line.split(" ")) {
-        tokens.add(new Word(word));
-      }
-    }
-
-    return tokens;
-  }
-
-  public Tree parse(List<HasWord> tokens) {
-    Tree tree = parser.apply(tokens);
-    return tree;
-  }
-
-  public int[] constTreeParents(Tree tree) {
-    Tree binarized = binarizer.transformTree(tree);
-    Tree collapsedUnary = transformer.transformTree(binarized);
-    Trees.convertToCoreLabels(collapsedUnary);
-    collapsedUnary.indexSpans();
-    List<Tree> leaves = collapsedUnary.getLeaves();
-    int size = collapsedUnary.size() - leaves.size();
-    int[] parents = new int[size];
-    HashMap<Integer, Integer> index = new HashMap<Integer, Integer>();
-
-    int idx = leaves.size();
-    int leafIdx = 0;
-    for (Tree leaf : leaves) {
-      Tree cur = leaf.parent(collapsedUnary); // go to preterminal
-      int curIdx = leafIdx++;
-      boolean done = false;
-      while (!done) {
-        Tree parent = cur.parent(collapsedUnary);
-        if (parent == null) {
-          parents[curIdx] = 0;
-          break;
-        }
-
-        int parentIdx;
-        int parentNumber = parent.nodeNumber(collapsedUnary);
-        if (!index.containsKey(parentNumber)) {
-          parentIdx = idx++;
-          index.put(parentNumber, parentIdx);
-        } else {
-          parentIdx = index.get(parentNumber);
-          done = true;
-        }
-
-        parents[curIdx] = parentIdx + 1;
-        cur = parent;
-        curIdx = parentIdx;
-      }
-    }
-
-    return parents;
-  }
-
-  // convert constituency parse to a dependency representation and return the
-  // parent pointer representation of the tree
-  public int[] depTreeParents(Tree tree, List<HasWord> tokens) {
-    GrammaticalStructure gs = gsf.newGrammaticalStructure(tree);
-    Collection<TypedDependency> tdl = gs.typedDependencies();
-    int len = tokens.size();
-    int[] parents = new int[len];
-    for (int i = 0; i < len; i++) {
-      // if a node has a parent of -1 at the end of parsing, then the node
-      // has no parent.
-      parents[i] = -1;
-    }
-
-    for (TypedDependency td : tdl) {
-      // let root have index 0
-      int child = td.dep().index();
-      int parent = td.gov().index();
-      parents[child - 1] = parent;
-    }
-
-    return parents;
-  }
-
-  public void printTokens(List<HasWord> tokens) throws IOException {
-    int len = tokens.size();
-    StringBuilder sb = new StringBuilder();
-    for (int i = 0; i < len - 1; i++) {
-      if (tokenize) {
-        sb.append(PTBTokenizer.ptbToken2Text(tokens.get(i).word()));
-      } else {
-        sb.append(tokens.get(i).word());
-      }
-      sb.append(' ');
-    }
-
-    if (tokenize) {
-      sb.append(PTBTokenizer.ptbToken2Text(tokens.get(len - 1).word()));
-    } else {
-      sb.append(tokens.get(len - 1).word());
-    }
-
-    sb.append('\n');
-    tokWriter.write(sb.toString());
-  }
-
-  public void printParents(int[] parents) throws IOException {
-    StringBuilder sb = new StringBuilder();
-    int size = parents.length;
-    for (int i = 0; i < size - 1; i++) {
-      sb.append(parents[i]);
-      sb.append(' ');
-    }
-    sb.append(parents[size - 1]);
-    sb.append('\n');
-    parentWriter.write(sb.toString());
-  }
-
-  public void close() throws IOException {
-    if (tokWriter != null) tokWriter.close();
-    parentWriter.close();
-  }
-
-  public static void main(String[] args) throws Exception {
-    Properties props = StringUtils.argsToProperties(args);
-    if (!props.containsKey("parentpath")) {
-      System.err.println(
-        "usage: java ConstituencyParse -deps - -tokenize - -tokpath <tokpath> -parentpath <parentpath>");
-      System.exit(1);
-    }
-
-    // whether to tokenize input sentences
-    boolean tokenize = false;
-    if (props.containsKey("tokenize")) {
-      tokenize = true;
-    }
-
-    // whether to produce dependency trees from the constituency parse
-    boolean deps = false;
-    if (props.containsKey("deps")) {
-      deps = true;
-    }
-
-    String tokPath = props.containsKey("tokpath") ? props.getProperty("tokpath") : null;
-    String parentPath = props.getProperty("parentpath");
-    ConstituencyParse processor = new ConstituencyParse(tokPath, parentPath, tokenize);
-
-    Scanner stdin = new Scanner(System.in);
-    int count = 0;
-    long start = System.currentTimeMillis();
-    while (stdin.hasNextLine()) {
-      String line = stdin.nextLine();
-      List<HasWord> tokens = processor.sentenceToTokens(line);
-      Tree parse = processor.parse(tokens);
-
-      // produce parent pointer representation
-      int[] parents = deps ? processor.depTreeParents(parse, tokens)
-                           : processor.constTreeParents(parse);
-
-      // print
-      if (tokPath != null) {
-        processor.printTokens(tokens);
-      }
-      processor.printParents(parents);
-
-      count++;
-      if (count % 1000 == 0) {
-        double elapsed = (System.currentTimeMillis() - start) / 1000.0;
-        System.err.printf("Parsed %d lines (%.2fs)\n", count, elapsed);
-      }
-    }
-
-    long totalTimeMillis = System.currentTimeMillis() - start;
-    System.err.printf("Done: %d lines in %.2fs (%.1fms per line)\n",
-      count, totalTimeMillis / 1000.0, totalTimeMillis / (double) count);
-    processor.close();
-  }
-}
diff --git a/example/gluon/tree_lstm/lib/DependencyParse.java b/example/gluon/tree_lstm/lib/DependencyParse.java
deleted file mode 100644
index 445cab8..0000000
--- a/example/gluon/tree_lstm/lib/DependencyParse.java
+++ /dev/null
@@ -1,159 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-import edu.stanford.nlp.process.WordTokenFactory;
-import edu.stanford.nlp.ling.HasWord;
-import edu.stanford.nlp.ling.Word;
-import edu.stanford.nlp.ling.TaggedWord;
-import edu.stanford.nlp.parser.nndep.DependencyParser;
-import edu.stanford.nlp.process.PTBTokenizer;
-import edu.stanford.nlp.trees.TypedDependency;
-import edu.stanford.nlp.util.StringUtils;
-import edu.stanford.nlp.tagger.maxent.MaxentTagger;
-
-import java.io.BufferedWriter;
-import java.io.FileWriter;
-import java.io.StringReader;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.List;
-import java.util.Properties;
-import java.util.Scanner;
-
-public class DependencyParse {
-
-  public static final String TAGGER_MODEL = "stanford-tagger/models/english-left3words-distsim.tagger";
-  public static final String PARSER_MODEL = "edu/stanford/nlp/models/parser/nndep/english_SD.gz";
-
-  public static void main(String[] args) throws Exception {
-    Properties props = StringUtils.argsToProperties(args);
-    if (!props.containsKey("tokpath") ||
-        !props.containsKey("parentpath") ||
-        !props.containsKey("relpath")) {
-      System.err.println(
-        "usage: java DependencyParse -tokenize - -tokpath <tokpath> -parentpath <parentpath> -relpath <relpath>");
-      System.exit(1);
-    }
-
-    boolean tokenize = false;
-    if (props.containsKey("tokenize")) {
-      tokenize = true;
-    }
-
-    String tokPath = props.getProperty("tokpath");
-    String parentPath = props.getProperty("parentpath");
-    String relPath = props.getProperty("relpath");
-
-    BufferedWriter tokWriter = new BufferedWriter(new FileWriter(tokPath));
-    BufferedWriter parentWriter = new BufferedWriter(new FileWriter(parentPath));
-    BufferedWriter relWriter = new BufferedWriter(new FileWriter(relPath));
-
-    MaxentTagger tagger = new MaxentTagger(TAGGER_MODEL);
-    DependencyParser parser = DependencyParser.loadFromModelFile(PARSER_MODEL);
-    Scanner stdin = new Scanner(System.in);
-    int count = 0;
-    long start = System.currentTimeMillis();
-    while (stdin.hasNextLine()) {
-      String line = stdin.nextLine();
-      List<HasWord> tokens = new ArrayList<>();
-      if (tokenize) {
-        PTBTokenizer<Word> tokenizer = new PTBTokenizer(
-          new StringReader(line), new WordTokenFactory(), "");
-        for (Word label; tokenizer.hasNext(); ) {
-          tokens.add(tokenizer.next());
-        }
-      } else {
-        for (String word : line.split(" ")) {
-          tokens.add(new Word(word));
-        }
-      }
-
-      List<TaggedWord> tagged = tagger.tagSentence(tokens);
-
-      int len = tagged.size();
-      Collection<TypedDependency> tdl = parser.predict(tagged).typedDependencies();
-      int[] parents = new int[len];
-      for (int i = 0; i < len; i++) {
-        // if a node has a parent of -1 at the end of parsing, then the node
-        // has no parent.
-        parents[i] = -1;
-      }
-
-      String[] relns = new String[len];
-      for (TypedDependency td : tdl) {
-        // let root have index 0
-        int child = td.dep().index();
-        int parent = td.gov().index();
-        relns[child - 1] = td.reln().toString();
-        parents[child - 1] = parent;
-      }
-
-      // print tokens
-      StringBuilder sb = new StringBuilder();
-      for (int i = 0; i < len - 1; i++) {
-        if (tokenize) {
-          sb.append(PTBTokenizer.ptbToken2Text(tokens.get(i).word()));
-        } else {
-          sb.append(tokens.get(i).word());
-        }
-        sb.append(' ');
-      }
-      if (tokenize) {
-        sb.append(PTBTokenizer.ptbToken2Text(tokens.get(len - 1).word()));
-      } else {
-        sb.append(tokens.get(len - 1).word());
-      }
-      sb.append('\n');
-      tokWriter.write(sb.toString());
-
-      // print parent pointers
-      sb = new StringBuilder();
-      for (int i = 0; i < len - 1; i++) {
-        sb.append(parents[i]);
-        sb.append(' ');
-      }
-      sb.append(parents[len - 1]);
-      sb.append('\n');
-      parentWriter.write(sb.toString());
-
-      // print relations
-      sb = new StringBuilder();
-      for (int i = 0; i < len - 1; i++) {
-        sb.append(relns[i]);
-        sb.append(' ');
-      }
-      sb.append(relns[len - 1]);
-      sb.append('\n');
-      relWriter.write(sb.toString());
-
-      count++;
-      if (count % 1000 == 0) {
-        double elapsed = (System.currentTimeMillis() - start) / 1000.0;
-        System.err.printf("Parsed %d lines (%.2fs)\n", count, elapsed);
-      }
-    }
-
-    long totalTimeMillis = System.currentTimeMillis() - start;
-    System.err.printf("Done: %d lines in %.2fs (%.1fms per line)\n",
-      count, totalTimeMillis / 1000.0, totalTimeMillis / (double) count);
-    tokWriter.close();
-    parentWriter.close();
-    relWriter.close();
-  }
-}
diff --git a/example/gluon/tree_lstm/main.py b/example/gluon/tree_lstm/main.py
deleted file mode 100644
index 41e4f4f..0000000
--- a/example/gluon/tree_lstm/main.py
+++ /dev/null
@@ -1,191 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# This example is inspired by https://github.com/dasguptar/treelstm.pytorch
-import argparse, math, os, random
-try:
-    import cPickle as pickle
-except ImportError:
-    import pickle
-import logging
-logging.basicConfig(level=logging.INFO)
-import numpy as np
-from tqdm import tqdm
-
-import mxnet as mx
-from mxnet import gluon
-from mxnet.gluon import nn
-from mxnet import autograd as ag
-
-from tree_lstm import SimilarityTreeLSTM
-from dataset import Vocab, SICKDataIter
-
-parser = argparse.ArgumentParser(description='TreeLSTM for Sentence Similarity on Dependency Trees')
-parser.add_argument('--data', default='data/sick/',
-                    help='path to raw dataset. required when preprocessed dataset is not available.')
-parser.add_argument('--word_embed', default='data/glove/glove.840B.300d.txt',
-                    help='directory with word embeddings. required when preprocessed dataset is not available.')
-parser.add_argument('--batch_size', type=int, default=25,
-                    help='training batch size per device (CPU/GPU).')
-parser.add_argument('--epochs', default=50, type=int,
-                    help='number of total epochs to run')
-parser.add_argument('--lr', default=0.02, type=float,
-                    help='initial learning rate')
-parser.add_argument('--wd', default=0.0001, type=float,
-                    help='weight decay factor')
-parser.add_argument('--optimizer', default='adagrad',
-                    help='optimizer (default: adagrad)')
-parser.add_argument('--seed', default=123, type=int,
-                    help='random seed (default: 123)')
-parser.add_argument('--use-gpu', action='store_true',
-                    help='whether to use GPU.')
-
-opt = parser.parse_args()
-
-logging.info(opt)
-
-context = [mx.gpu(0) if opt.use_gpu else mx.cpu()]
-
-rnn_hidden_size, sim_hidden_size, num_classes = 150, 50, 5
-optimizer = opt.optimizer.lower()
-
-mx.random.seed(opt.seed)
-np.random.seed(opt.seed)
-random.seed(opt.seed)
-
-batch_size = opt.batch_size
-
-# read dataset
-if os.path.exists('dataset.pickle'):
-    with open('dataset.pickle', 'rb') as f:
-        train_iter, dev_iter, test_iter, vocab = pickle.load(f)
-else:
-    root_dir = opt.data
-    segments = ['train', 'dev', 'test']
-    token_files = [os.path.join(root_dir, seg, '%s.toks'%tok)
-                   for tok in ['a', 'b']
-                   for seg in segments]
-
-    vocab = Vocab(filepaths=token_files, embedpath=opt.word_embed)
-
-    train_iter, dev_iter, test_iter = [SICKDataIter(os.path.join(root_dir, segment), vocab, num_classes)
-                                       for segment in segments]
-    with open('dataset.pickle', 'wb') as f:
-        pickle.dump([train_iter, dev_iter, test_iter, vocab], f)
-
-logging.info('==> SICK vocabulary size : %d ' % vocab.size)
-logging.info('==> Size of train data   : %d ' % len(train_iter))
-logging.info('==> Size of dev data     : %d ' % len(dev_iter))
-logging.info('==> Size of test data    : %d ' % len(test_iter))
-
-# get network
-net = SimilarityTreeLSTM(sim_hidden_size, rnn_hidden_size, vocab.size, vocab.embed.shape[1], num_classes)
-
-# use pearson correlation and mean-square error for evaluation
-metric = mx.gluon.metric.create(['pearsonr', 'mse'])
-
-def to_target(x):
-    target = np.zeros((1, num_classes))
-    ceil = int(math.ceil(x))
-    floor = int(math.floor(x))
-    if ceil==floor:
-        target[0][floor-1] = 1
-    else:
-        target[0][floor-1] = ceil - x
-        target[0][ceil-1] = x - floor
-    return mx.nd.array(target)
-
-def to_score(x):
-    levels = mx.nd.arange(1, 6, ctx=x.context)
-    return [mx.nd.sum(levels*mx.nd.exp(x), axis=1).reshape((-1,1))]
-
-# when evaluating in validation mode, check and see if pearson-r is improved
-# if so, checkpoint and run evaluation on test dataset
-def test(ctx, data_iter, best, mode='validation', num_iter=-1):
-    data_iter.reset()
-    batches = len(data_iter)
-    data_iter.set_context(ctx[0])
-    preds = []
-    labels = [mx.nd.array(data_iter.labels, ctx=ctx[0]).reshape((-1,1))]
-    for _ in tqdm(range(batches), desc='Testing in {} mode'.format(mode)):
-        l_tree, l_sent, r_tree, r_sent, label = data_iter.next()
-        z = net(mx.nd, l_sent, r_sent, l_tree, r_tree)
-        preds.append(z)
-
-    preds = to_score(mx.nd.concat(*preds, dim=0))
-    metric.update(preds, labels)
-    names, values = metric.get()
-    metric.reset()
-    for name, acc in zip(names, values):
-        logging.info(mode+' acc: %s=%f'%(name, acc))
-        if name == 'pearsonr':
-            test_r = acc
-    if mode == 'validation' and num_iter >= 0:
-        if test_r >= best:
-            best = test_r
-            logging.info('New optimum found: {}. Checkpointing.'.format(best))
-            net.save_parameters('childsum_tree_lstm_{}.params'.format(num_iter))
-            test(ctx, test_iter, -1, 'test')
-        return best
-
-
-def train(epoch, ctx, train_data, dev_data):
-
-    # initialization with context
-    if isinstance(ctx, mx.Context):
-        ctx = [ctx]
-    net.initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx[0])
-    net.embed.weight.set_data(vocab.embed.as_in_context(ctx[0]))
-    train_data.set_context(ctx[0])
-    dev_data.set_context(ctx[0])
-    # set up trainer for optimizing the network.
-    trainer = gluon.Trainer(net.collect_params(), optimizer, {'learning_rate': opt.lr, 'wd': opt.wd})
-
-    best_r = -1
-    Loss = gluon.loss.KLDivLoss()
-    for i in range(epoch):
-        train_data.reset()
-        num_batches = len(train_data)
-        # collect predictions and labels for evaluation metrics
-        preds = []
-        labels = [mx.nd.array(train_data.labels, ctx=ctx[0]).reshape((-1,1))]
-        for j in tqdm(range(num_batches), desc='Training epoch {}'.format(i)):
-            # get next batch
-            l_tree, l_sent, r_tree, r_sent, label = train_data.next()
-            # use autograd to record the forward calculation
-            with ag.record():
-                # forward calculation. the output is log probability
-                z = net(mx.nd, l_sent, r_sent, l_tree, r_tree)
-                # calculate loss
-                loss = Loss(z, to_target(label).as_in_context(ctx[0]))
-                # backward calculation for gradients.
-                loss.backward()
-                preds.append(z)
-            # update weight after every batch_size samples
-            if (j+1) % batch_size == 0:
-                trainer.step(batch_size)
-
-        # translate log-probability to scores, and evaluate
-        preds = to_score(mx.nd.concat(*preds, dim=0))
-        metric.update(preds, labels)
-        names, values = metric.get()
-        metric.reset()
-        for name, acc in zip(names, values):
-            logging.info('training acc at epoch %d: %s=%f'%(i, name, acc))
-        best_r = test(ctx, dev_data, best_r, num_iter=i)
-
-train(opt.epochs, context, train_iter, dev_iter)
diff --git a/example/gluon/tree_lstm/scripts/download.py b/example/gluon/tree_lstm/scripts/download.py
deleted file mode 100644
index 6537ef1..0000000
--- a/example/gluon/tree_lstm/scripts/download.py
+++ /dev/null
@@ -1,106 +0,0 @@
... 6742 lines suppressed ...