You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by GitBox <gi...@apache.org> on 2018/01/15 18:28:43 UTC

[GitHub] leleamol closed pull request #9367: Limit the test_nccl to run on 8 GPUs only until NCCL2.1 issue is fixed.

leleamol closed pull request #9367: Limit the test_nccl to run on 8 GPUs only until NCCL2.1 issue is fixed.
URL: https://github.com/apache/incubator-mxnet/pull/9367
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/perl-package/AI-MXNet/lib/AI/MXNet/Visualization.pm b/perl-package/AI-MXNet/lib/AI/MXNet/Visualization.pm
index e28cd65472..19c38bc646 100644
--- a/perl-package/AI-MXNet/lib/AI/MXNet/Visualization.pm
+++ b/perl-package/AI-MXNet/lib/AI/MXNet/Visualization.pm
@@ -151,9 +151,9 @@ method print_summary(
         my $cur_param = 0;
         if($op eq 'Convolution')
         {
-            my $num_filter = $node->{attr}{num_filter};
+            my $num_filter = $node->{attrs}{num_filter};
             $cur_param = $pre_filter * $num_filter;
-            while($node->{attr}{kernel} =~ /(\d+)/g)
+            while($node->{attrs}{kernel} =~ /(\d+)/g)
             {
                 $cur_param *= $1;
             }
@@ -161,7 +161,7 @@ method print_summary(
         }
         elsif($op eq 'FullyConnected')
         {
-            $cur_param = $pre_filter * ($node->{attr}{num_hidden} + 1);
+            $cur_param = $pre_filter * ($node->{attrs}{num_hidden} + 1);
         }
         elsif($op eq 'BatchNorm')
         {
@@ -325,15 +325,15 @@ method plot_network(
         }
         elsif($op eq 'Convolution')
         {
-            my @k = $node->{attr}{kernel} =~ /(\d+)/g;
-            my @stride = ($node->{attr}{stride}//'') =~ /(\d+)/g;
+            my @k = $node->{attrs}{kernel} =~ /(\d+)/g;
+            my @stride = ($node->{attrs}{stride}//'') =~ /(\d+)/g;
             $stride[0] //= 1;
-            $label = "Convolution\n".join('x',@k).'/'.join('x',@stride).", $node->{attr}{num_filter}";
+            $label = "Convolution\n".join('x',@k).'/'.join('x',@stride).", $node->{attrs}{num_filter}";
             $attr{fillcolor} = $cm[1];
         }
         elsif($op eq 'FullyConnected')
         {
-            $label = "FullyConnected\n$node->{attr}{num_hidden}";
+            $label = "FullyConnected\n$node->{attrs}{num_hidden}";
             $attr{fillcolor} = $cm[1];
         }
         elsif($op eq 'BatchNorm')
@@ -342,15 +342,15 @@ method plot_network(
         }
         elsif($op eq 'Activation' or $op eq 'LeakyReLU')
         {
-            $label = "$op\n$node->{attr}{act_type}";
+            $label = "$op\n$node->{attrs}{act_type}";
             $attr{fillcolor} = $cm[2];
         }
         elsif($op eq 'Pooling')
         {
-            my @k = $node->{attr}{kernel} =~ /(\d+)/g;
-            my @stride = ($node->{attr}{stride}//'') =~ /(\d+)/g;
+            my @k = $node->{attrs}{kernel} =~ /(\d+)/g;
+            my @stride = ($node->{attrs}{stride}//'') =~ /(\d+)/g;
             $stride[0] //= 1;
-            $label = "Pooling\n$node->{attr}{pool_type}, ".join('x',@k).'/'.join('x',@stride);
+            $label = "Pooling\n$node->{attrs}{pool_type}, ".join('x',@k).'/'.join('x',@stride);
             $attr{fillcolor} = $cm[4];
         }
         elsif($op eq 'Concat' or $op eq 'Flatten' or $op eq 'Reshape')
@@ -366,7 +366,7 @@ method plot_network(
             $attr{fillcolor} = $cm[7];
             if($op eq 'Custom')
             {
-                $label = $node->{attr}{op_type};
+                $label = $node->{attrs}{op_type};
             }
         }
         $dot->graph->add_node($name, label => $label, %attr);
@@ -396,11 +396,11 @@ method plot_network(
                     {
                         my $key = $input_name;
                         $key   .= '_output' if $input_node->{op} ne 'null';
-                        if($input_node->{op} ne 'null' and exists $input_node->{attr})
+                        if($input_node->{op} ne 'null' and exists $input_node->{attrs})
                         {
-                            if(ref $input_node->{attr} eq 'HASH' and exists $input_node->{attr}{num_outputs})
+                            if(ref $input_node->{attrs} eq 'HASH' and exists $input_node->{attrs}{num_outputs})
                             {
-                                $key .= ($input_node->{attr}{num_outputs} - 1);
+                                $key .= ($input_node->{attrs}{num_outputs} - 1);
                             }
                         }
                         my $end = @{ $shape_dict{$key} };
diff --git a/perl-package/AI-MXNet/t/test_gluon_rnn.t b/perl-package/AI-MXNet/t/test_gluon_rnn.t
index 13f2293146..83b294d110 100644
--- a/perl-package/AI-MXNet/t/test_gluon_rnn.t
+++ b/perl-package/AI-MXNet/t/test_gluon_rnn.t
@@ -320,15 +320,15 @@ sub test_rnn_layers
     check_rnn_layer_forward(gluon->rnn->GRU(10, 2), mx->nd->ones([8, 3, 20]));
     check_rnn_layer_forward(gluon->rnn->GRU(10, 2), mx->nd->ones([8, 3, 20]), mx->nd->ones([2, 3, 10]));
 
-    my $net = gluon->nn->Sequential();
-    $net->add(gluon->rnn->LSTM(10, 2, bidirectional=>1));
-    $net->add(gluon->nn->BatchNorm(axis=>2));
-    $net->add(gluon->nn->Flatten());
-    $net->add(gluon->nn->Dense(3, activation=>'relu'));
-    $net->collect_params()->initialize();
-    mx->autograd->record(sub {
-        $net->(mx->nd->ones([2, 3, 10]))->backward();
-    });
+#    my $net = gluon->nn->Sequential();
+#    $net->add(gluon->rnn->LSTM(10, 2, bidirectional=>1));
+#    $net->add(gluon->nn->BatchNorm(axis=>2));
+#    $net->add(gluon->nn->Flatten());
+#    $net->add(gluon->nn->Dense(3, activation=>'relu'));
+#    $net->collect_params()->initialize();
+#    mx->autograd->record(sub {
+#        $net->(mx->nd->ones([2, 3, 10]))->backward();
+#    });
 }
 
 test_rnn_layers();
diff --git a/perl-package/AI-MXNet/t/test_loss.t b/perl-package/AI-MXNet/t/test_loss.t
index f98d4f25a3..03875fa20d 100644
--- a/perl-package/AI-MXNet/t/test_loss.t
+++ b/perl-package/AI-MXNet/t/test_loss.t
@@ -230,7 +230,7 @@ sub test_ctc_loss_train
     $mod->fit($data_iter, num_epoch=>200, optimizer_params=>{learning_rate => 1},
             initializer=>mx->init->Xavier(magnitude=>2), eval_metric=>mx->metric->Loss(),
             optimizer=>'adam');
-    ok($mod->score($data_iter, mx->metric->Loss())->{loss} < 10);
+    ok($mod->score($data_iter, mx->metric->Loss())->{loss} < 20);
 }
 
 test_ctc_loss_train();
diff --git a/python/mxnet/contrib/__init__.py b/python/mxnet/contrib/__init__.py
index 2730bc4386..21c77719b7 100644
--- a/python/mxnet/contrib/__init__.py
+++ b/python/mxnet/contrib/__init__.py
@@ -26,3 +26,5 @@
 
 from . import autograd
 from . import tensorboard
+
+from . import text
diff --git a/python/mxnet/contrib/text/__init__.py b/python/mxnet/contrib/text/__init__.py
new file mode 100644
index 0000000000..fff2b94475
--- /dev/null
+++ b/python/mxnet/contrib/text/__init__.py
@@ -0,0 +1,24 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# coding: utf-8
+"""This module includes utilities for indexing and embedding text."""
+
+from . import utils
+from . import indexer
+from . import embedding
+from . import glossary
diff --git a/python/mxnet/contrib/text/constants.py b/python/mxnet/contrib/text/constants.py
new file mode 100644
index 0000000000..b69e5d966e
--- /dev/null
+++ b/python/mxnet/contrib/text/constants.py
@@ -0,0 +1,344 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# coding: utf-8
+
+"""Read text files and load embeddings."""
+from __future__ import absolute_import
+from __future__ import print_function
+
+UNKNOWN_IDX = 0
+
+APACHE_REPO_URL = 'https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/'
+
+GLOVE_PRETRAINED_FILE_SHA1 = \
+    {'glove.42B.300d.zip': 'f8e722b39578f776927465b71b231bae2ae8776a',
+     'glove.6B.zip': 'b64e54f1877d2f735bdd000c1d7d771e25c7dfdc',
+     'glove.840B.300d.zip': '8084fbacc2dee3b1fd1ca4cc534cbfff3519ed0d',
+     'glove.twitter.27B.zip': 'dce69c404025a8312c323197347695e81fd529fc'}
+
+GLOVE_PRETRAINED_ARCHIVE_SHA1 = \
+    {'glove.42B.300d.txt': '876767977d6bd4d947c0f84d44510677bc94612a',
+     'glove.6B.50d.txt': '21bf566a9d27f84d253e0cd4d4be9dcc07976a6d',
+     'glove.6B.100d.txt': '16b1dbfaf35476790bd9df40c83e2dfbd05312f1',
+     'glove.6B.200d.txt': '17d0355ddaa253e298ede39877d1be70f99d9148',
+     'glove.6B.300d.txt': '646443dd885090927f8215ecf7a677e9f703858d',
+     'glove.840B.300d.txt': '294b9f37fa64cce31f9ebb409c266fc379527708',
+     'glove.twitter.27B.25d.txt':
+         '767d80889d8c8a22ae7cd25e09d0650a6ff0a502',
+     'glove.twitter.27B.50d.txt':
+         '9585f4be97e286339bf0112d0d3aa7c15a3e864d',
+     'glove.twitter.27B.100d.txt':
+         '1bbeab8323c72332bd46ada0fc3c99f2faaa8ca8',
+     'glove.twitter.27B.200d.txt':
+         '7921c77a53aa5977b1d9ce3a7c4430cbd9d1207a'}
+
+FAST_TEXT_FILE_SHA1 = \
+    {'wiki.ab.vec': '9d89a403a9a866d3da8dd8cfab849f59ee499343',
+     'wiki.ace.vec': '85d00074f7a08626f39da6a0c8a5cfa250096ab9',
+     'wiki.ady.vec': '9d17d74f0348224cdebf8a831e61af0825f8952d',
+     'wiki.aa.vec': '5cce30fc85471572c498f278bbe495184577363e',
+     'wiki.af.vec': '999e64bcd8dab8de42cb1feceeca360def35324d',
+     'wiki.ak.vec': '6092b8af335c2dc93e8df2bbf1d715f01e637bb4',
+     'wiki.sq.vec': 'd07ffed553f5eb4756d0a1548a7ba9a51a52f7c6',
+     'wiki.als.vec': '96052e96870695cca50857b5fde5f9f42219139a',
+     'wiki.am.vec': 'dff7fcdd8f5ba0638ab9e1758a89800766156d72',
+     'wiki.ang.vec': 'a7c30e02422d97d23a0701279c5c1c03159130a5',
+     'wiki.ar.vec': 'c46e2142f799cc385bd25f0c0a8943ca565505a4',
+     'wiki.an.vec': '5b4c2b1de5c04e4e0be83841410ca84c47305d21',
+     'wiki.arc.vec': 'fd3ad743103f80cde9cfc048d7ca509e50efb35a',
+     'wiki.hy.vec': '21f9259d04cfd22db446a45d3622af225f00cf20',
+     'wiki.roa_rup.vec': 'e31a44353cd84b976586c8df35a2ab58318120f0',
+     'wiki.as.vec': 'cad5883b5147cbe6cdbf604f65cabdb675a59258',
+     'wiki.ast.vec': '89a90357101953b7c292697fd050c00fe5c38ac5',
+     'wiki.av.vec': '99976a63ca8c4231f808fd4314f0433db35e290d',
+     'wiki.ay.vec': 'be359dad25b2c742d3abfa94c5f5db13f86c730e',
+     'wiki.az.vec': '9581d55d9056ad398a153c37b502f3a07867d091',
+     'wiki.bm.vec': 'f36a19c95e90865f6518d4487e59f363b47bd865',
+     'wiki.bjn.vec': '5f134cf288e8042dcd048a3ee76159aab42c7288',
+     'wiki.map_bms.vec': 'e7deab5fdd38fa3331b1bcb4a16432b38c512e21',
+     'wiki.ba.vec': '22147ee16b2d163cc88d09a035264fd0c10dab68',
+     'wiki.eu.vec': '5e72f4ef93666971fea5d2180b354e0a0821ba91',
+     'wiki.bar.vec': '96130f1f2e5bffdd06c202ad4472e5234020980a',
+     'wiki.be.vec': '6cf81322cd7b046a7f02ec4c4960ad27045383fa',
+     'wiki.bn.vec': '6fc3bfd9af455719f55bee0bea31b11afc70cf06',
+     'wiki.bh.vec': 'ab2d29017afa015c49566a6d9bf75393c23ac4c0',
+     'wiki.bpy.vec': 'c2bb15487c4bdb8fa869772694300ae1fee73896',
+     'wiki.bi.vec': '15785220cd6e6c86cc87e7d3f3322a5541a4fe5d',
+     'wiki.bs.vec': 'c4943a290819ceae1611dd11179b40aab0df0471',
+     'wiki.br.vec': 'df44e16abd2017e2a1b6c6588ee02779b19907f6',
+     'wiki.bug.vec': '942d8f7dadde5faa33aa72862501434f48e29f60',
+     'wiki.bg.vec': '7c1cc6d0c52b038e4b7173259b0c009f242cf486',
+     'wiki.my.vec': 'e7c7989e32b23ca1a9caf534cc65ecaf9e1b9112',
+     'wiki.bxr.vec': 'eaf767690c6b194605ae778719212e3874873d4c',
+     'wiki.zh_yue.vec': 'd2ac1ab9eb1a908797644f83f259c90cb3c1a350',
+     'wiki.ca.vec': 'f5971edee11c939f6a7accfd33a9a45caa54141a',
+     'wiki.ceb.vec': 'b8516a55537b8f80c927d77d95cdf7e4ff849a05',
+     'wiki.bcl.vec': 'd4117b5c443438ddfa608b10a5be2c2501817e7e',
+     'wiki.ch.vec': '46803f3a1734f6a7b0d8cb053bbb86a6915d02e9',
+     'wiki.cbk_zam.vec': '6fef47b4559eec402ce371de20dfb018acd6347d',
+     'wiki.ce.vec': '1d94b0168a773895b23889f7f07d7cf56c11a360',
+     'wiki.chr.vec': '8501bf86b41074ed6c8d15b9209ef7ce83122e70',
+     'wiki.chy.vec': '26c87688551ffe3a0c7a5952e894306651e62131',
+     'wiki.ny.vec': '4e066fe113630fdfbcaf8844cc4ad64388db98d0',
+     'wiki.zh.vec': '117ab34faa80e381641fbabf3a24bc8cfba44050',
+     'wiki.cho.vec': 'cec6778f025fa9ae4134046c6c3a6291bd9c63f9',
+     'wiki.cv.vec': '9cdb0bee5a0fea030def85597dba7108f21b0424',
+     'wiki.zh_classical.vec': '840981c83dd8e5cb02d1cd695e2fe0870941316c',
+     'wiki.kw.vec': 'f9eaa35a7e4f077f6de85c7801f74582f91b52c1',
+     'wiki.co.vec': 'af876a918594e5541207bc12f17bfc4268df7b93',
+     'wiki.cr.vec': '61dd9f044b7dfa56dcf1c3c07c7504c569420528',
+     'wiki.crh.vec': 'c0d2310a1207fcacc94b25b149420b33bf835015',
+     'wiki.hr.vec': '0c96f9af092cf8a84b03aec1426cd23921671489',
+     'wiki.cs.vec': 'f3ec1502aeee6a550d8cf784273fa62f61419a4e',
+     'wiki.da.vec': '526947dab1ffbc1465c7a766f2bca4de50676b08',
+     'wiki.dv.vec': 'e135ba97c711a021bc3317db2b95db5212c17658',
+     'wiki.nl.vec': 'd796ee27e37b7d1d464e03c265c31ab62b52533e',
+     'wiki.nds_nl.vec': '1cd96d12e78e5cd3f65ca2773a17696bda387b9f',
+     'wiki.dz.vec': '4cc1c6cf4aa4676d40a3145d5d4623569e430f89',
+     'wiki.pa.vec': '4939d0db77a5b28d7d5aab0fab4f999d93b2053e',
+     'wiki.arz.vec': '5e904087043b91f4945dd708f4230fdf51360132',
+     'wiki.eml.vec': 'de6be7a2ffdda226eec730dd54b4c614bd7f5dca',
+     'wiki.en.vec': 'c1e418f144ceb332b4328d27addf508731fa87df',
+     'wiki.myv.vec': '7de0927fd3d65677de7f770b3bd57c73b58df85d',
+     'wiki.eo.vec': 'b56998fd69f66755b722a9481a9bdaf10f62c9aa',
+     'wiki.et.vec': '64d56b66c02d5e49b1b66a85854d67d2dd9ebd41',
+     'wiki.ee.vec': 'f2212f58ec082321bc9b93873cd22702d0a64d64',
+     'wiki.ext.vec': '456c5632b13a0f136cd180ebe2dda67b83f78397',
+     'wiki.fo.vec': 'eead8ddc7bb74b12b16784723abf802bb51f844d',
+     'wiki.hif.vec': '49697cf784814d3f1a47559724028e0fc0940d36',
+     'wiki.fj.vec': 'c70fca34a7e43143600c54d7bf199b88846ac6f2',
+     'wiki.fi.vec': '91d19baae994d7e556b5b5938be2dc6013f9c706',
+     'wiki.frp.vec': '0eb70a613ccf807c7308c1f62535f0606465029d',
+     'wiki.fr.vec': 'b092229005a65d8683a4112852fe6eb8161a6917',
+     'wiki.fur.vec': 'd4a595cffa1abcdcf4229ba15277179ce5d20bc6',
+     'wiki.ff.vec': '57ea8febb24ba8ac4421ec97ed8918d44c69f42c',
+     'wiki.gag.vec': 'c82ec7a5d081f0673661824f4fc34345dee255f0',
+     'wiki.gl.vec': '8888bb8f3d70b36729b9ae479fe3765e0c083862',
+     'wiki.gan.vec': 'aeea01c2c4a7c44d6e8c31845560baf43d7afb9c',
+     'wiki.ka.vec': '8b92b73f27f9b77818211e053a33985589de7c62',
+     'wiki.de.vec': '2ed2696afe55f023b0040b238d9a47e5fedfe48b',
+     'wiki.glk.vec': '20a7759075916e10531f5b3577302353cef565cd',
+     'wiki.gom.vec': '5a1193d9e5d49d06354c14e2b7c01bea176e13f1',
+     'wiki.got.vec': 'cc5aaf4c305f4f1f788b4829e644823f8495a23a',
+     'wiki.el.vec': '6f034271390feaa6f9d7d16f933ddef637755979',
+     'wiki.kl.vec': '390406cc33e02f86cfaf7ae273193679924f7413',
+     'wiki.gn.vec': '98594af7897c5a1f35885ddecc77556a7e7ae981',
+     'wiki.gu.vec': 'f9e13452eb63d92bea44c7c3db8fba9945c7000e',
+     'wiki.ht.vec': '5039dfb58a074ac046813f2dae81159be8c5213f',
+     'wiki.hak.vec': '9e83512d34c7f81739492bf0abbb25ff1ef88573',
+     'wiki.ha.vec': '677a24efeeb1bcb8c0a931407775f18b18e875ae',
+     'wiki.haw.vec': 'c23a50529dc010401c99833c8f990c1b27843db3',
+     'wiki.he.vec': '55534560247394669e3f5c169136770c93bc2708',
+     'wiki.hz.vec': '7605e06dd708920f73a80473816a8d684c116bd8',
+     'wiki.mrj.vec': 'aa1c1ecba1ffd6b42c8d9659a8a04ab328ae1650',
+     'wiki.hi.vec': '8049bb8604bc049d48bd934e27b0e184c480a413',
+     'wiki.ho.vec': 'ef6b84d508d4d0a4c4cf90facaca1eebe62b2187',
+     'wiki.hu.vec': 'cd777e9efca3d4bd97c89f01690cfa4840d9c46f',
+     'wiki.is.vec': 'ae0b018f92b3e218f2dacb2045a8f0a0446788a5',
+     'wiki.io.vec': 'af0c480c5872bff31d82e767c1116da2a6be0c00',
+     'wiki.ig.vec': 'd2d1643b4fb1a18a4d002cf2969073f7f201b3b2',
+     'wiki.ilo.vec': 'c0e43835a3f4e0033ea5d7c6ff189982b2f26a05',
+     'wiki.id.vec': 'c49d5c9bec89114599427f6c12a5bda2e5523dfd',
+     'wiki.ia.vec': '2a348dc924638efc20c34785852b0837364aed76',
+     'wiki.ie.vec': '01b0d11c0e7397418e73853d220e97bdcf7a8961',
+     'wiki.iu.vec': 'ed77a1d7b0faeeb1352b1c4fc1e69971e1e21174',
+     'wiki.ik.vec': '4d5d4f7a6426720e07d0faeb51b5babfa4acf44a',
+     'wiki.ga.vec': 'caaa5b2167a499893313ac1aa38416a6a0fe9a24',
+     'wiki.it.vec': 'ac4a985e85ffae48047034e2603d804bf126caa9',
+     'wiki.jam.vec': '6d51e384c56330097c2531fdbf4e74418909e388',
+     'wiki.ja.vec': '7a2b1af1e46d795410692a002e40fa3085135f69',
+     'wiki.jv.vec': '2ff7927d3ff04b8208133497b3778ede00ea463f',
+     'wiki.kbd.vec': 'f5b8dbe47a7fae702232b5680b070ef6e865539e',
+     'wiki.kab.vec': 'e3b73d41267d8d4cd42f6cc5a0c05dc4e021bf74',
+     'wiki.xal.vec': 'b738222d84cb8c8fdb2b30a7219aa5d3bdc2f61c',
+     'wiki.kn.vec': '32763f4f860f0d081f3aabf3e7d17b7858e7d877',
+     'wiki.kr.vec': 'c919463e96e4fe36dd5bd73be0c5cd144d4d4f91',
+     'wiki.pam.vec': '8fbd31e70d0ca0c61eb1a152efaa8ecb29180967',
+     'wiki.krc.vec': '0c6ef043d51e5f337a309804f1db180fa0bb2cb8',
+     'wiki.kaa.vec': 'd990d3b9bd511d2d630f923099a6b9110231b2ed',
+     'wiki.ks.vec': 'f0a69830a3f661c107503772cc6bd5e345f0c8d6',
+     'wiki.csb.vec': '649cb2692f08414987c875dc331022567d367497',
+     'wiki.kk.vec': '6343b2b31bad2e13d03a110b91c38fab4adc01cd',
+     'wiki.km.vec': '64f7fff1df90b1f7241b232e901f76223a3719e0',
+     'wiki.ki.vec': 'c4e373e2ea13f7fa1e95b0733365e4b3fc8b2cc8',
+     'wiki.rw.vec': 'af2ec410da6519a86ba21004c8b4c7fde768a91c',
+     'wiki.ky.vec': '13b0ae3f23822317a0243bd9182105c631c834b3',
+     'wiki.rn.vec': '9df628e8c25d928d3e9d127b624f79fd99ff8f4e',
+     'wiki.kv.vec': '164dc44d701b9d606a45f0b0446076adc3858dca',
+     'wiki.koi.vec': '4001f0617fe0fdd3b22116b304f497b7b16c6e4c',
+     'wiki.kg.vec': '379575f4c6e1c4b73e311ddf01b7a85afd047d7c',
+     'wiki.ko.vec': '042c85a788c2778cca538cf716b8a78f0d7fa823',
+     'wiki.kj.vec': 'adf29c1a3aa5dd53d85e04d68aa11a26c0eaf6c8',
+     'wiki.ku.vec': '4d3a2401527dd9ba6be2b0cd31f6cd3edebadce9',
+     'wiki.ckb.vec': 'adb2fef309f1d93f429442b9c16c1564192c58f3',
+     'wiki.lad.vec': 'c510e520cde97050bf1cbeb36f2b90e6348ceed4',
+     'wiki.lbe.vec': 'e72e5ea054334580f72fbe446a726d2b4962851d',
+     'wiki.lo.vec': '7c83f82b80c49b8eab21f62ecdb3681b8bda40a6',
+     'wiki.ltg.vec': 'ec2f13d1290bd54afcaa74569e66e43e9bfef264',
+     'wiki.la.vec': '9ea6286a0581084533db8d6ee96e0b7d15166543',
+     'wiki.lv.vec': 'ef6b549f96e22718f513d47a611d3d6bc001a164',
+     'wiki.lez.vec': '8e579b984a500ad89fc66767bfd7319766bd669b',
+     'wiki.lij.vec': '4ff5bb405c820e4119f0636efc301da15a08c00a',
+     'wiki.li.vec': '0fb9ec4ac93676d8ef651692062bc3d7f6ae0843',
+     'wiki.ln.vec': '70b6a286b42958e25cb80824e0d8f1aee2de6dde',
+     'wiki.lt.vec': '58d3ebef24e5e31be1a8318b45c08ebb16ad775a',
+     'wiki.olo.vec': 'cbadb4cada4dc579d0becdac93dfb479d76bf6c8',
+     'wiki.jbo.vec': 'c90481946aa4b6b304528292612ae620f6549f3e',
+     'wiki.lmo.vec': 'a89414d9ceee4823622258f18936f67faf7e06e7',
+     'wiki.nds.vec': '7bf293149c08226e05bcf0442ac6e601162b9ffd',
+     'wiki.dsb.vec': 'e49a647a441fbf011ac5411dd6005e8725b9a65d',
+     'wiki.lg.vec': 'b096f5248dfbb343dc4696c97ea253510e1c4ef9',
+     'wiki.lb.vec': 'b146f23628c84e64314a35a5b6cc65a33777e22d',
+     'wiki.mk.vec': '85a3d3f13fa88ffde023d2326c65bdded4983dff',
+     'wiki.mai.vec': '7f513ff36e485b19f91f83b30c32dd82e9e497f6',
+     'wiki.mg.vec': '0808252740909d6129f672584311263e7b2adadc',
+     'wiki.ms.vec': '458e1a079799a54cdc0a7b78c7fa1729d2683a6d',
+     'wiki.ml.vec': '2b70fe76e8cf199a18551de782784a21e8db0b66',
+     'wiki.mt.vec': '81f4c1d84dd4cc4276d59cb903fcc9aba46be981',
+     'wiki.gv.vec': '993a7ee31bdacc91763dad656aa6c2947b873473',
+     'wiki.mi.vec': 'e8acf9c7c2ab840a192c563aa776201a88e4ca89',
+     'wiki.mr.vec': '2cd6cf88bfdfb24850d345749ce0cfea8d65829e',
+     'wiki.mh.vec': '8c5dbbcb8ad08b9c8b39151fa56d553d116d1b5a',
+     'wiki.mzn.vec': 'aefad49237808acab99e1ca8eeaaf531666f261d',
+     'wiki.mhr.vec': '39f62e292336cabc364f0d1913540b881b406393',
+     'wiki.cdo.vec': '95e8196bf76323dbabab1b8a49ba4d677af3ccea',
+     'wiki.zh_min_nan.vec': 'f91ccb013e200bb7ed560082ddf4bdd9c2f315bb',
+     'wiki.min.vec': '3bb0fa596cf27a1d165c55684bebdc8d40cb8ad7',
+     'wiki.xmf.vec': 'dc1923cfd1a7002d5d60426b60e6756854ab4a14',
+     'wiki.mwl.vec': '3d10a218242b94fcc3981aa3beb012b701827a55',
+     'wiki.mdf.vec': 'b16099ce0283a241339716eac41cfd99fdea7f36',
+     'wiki.mo.vec': '9824ebe366bc52d84e66d1c0cc72b5f7ebb46110',
+     'wiki.mn.vec': '7cef7ecdf9d98484d9b598b25d0e717dba6acfd9',
+     'wiki.mus.vec': 'bb94534fdeee4df77ae3e27c252c8874f69a307d',
+     'wiki.nah.vec': 'c52e01cf4479fb7ec91ef39f298e8f97aeb6496e',
+     'wiki.na.vec': 'fbe1444b21e1a5885a619cf2a8607fcefca3c8db',
+     'wiki.nv.vec': 'f5a6ea213bfe95c82cb22b53b4965df8b67ffeab',
+     'wiki.ng.vec': '8577634e236133980243be0a6fb3c02ad2bb5290',
+     'wiki.nap.vec': '6c9bd8ce1e85ee679b25189fd6f6d36afb119b6c',
+     'wiki.ne.vec': '1045d7876f947cd4602d9ca79f7c4323a5d3a52d',
+     'wiki.new.vec': '51f6c0b4ef1aee9fad4ab1cb69a7479db35e39a5',
+     'wiki.pih.vec': 'a6a867cef441a06c926205daa9e405aaf58e8f63',
+     'wiki.nrm.vec': 'b4cb941b126b26fa045c5fc75a490a31a969101c',
+     'wiki.frr.vec': 'cde62af939cb2de35e341cef2c74813802a58ed4',
+     'wiki.lrc.vec': 'c1ae4fb79a19d44bfe8f601f0a30fbec841fa612',
+     'wiki.se.vec': 'f46b35ee6b893c2f12dd1b929bbc2b8120cbcd8d',
+     'wiki.nso.vec': 'a906271509c2b343df35d1471509492bbfa883aa',
+     'wiki.no.vec': 'd52e8019d7cc48569c8c3b514d2b1bd10261b5c0',
+     'wiki.nn.vec': '35aeab89ffeca0377accbbd3bf18b81913c75448',
+     'wiki.nov.vec': '5455c6e8463b1c43dd073e3e177702fb9a1dd834',
+     'wiki.ii.vec': '755a6b8ffa664e342c2ab72847af343c47f46c70',
+     'wiki.oc.vec': 'cc1833492899d75571148c2c305591f53d63f0b1',
+     'wiki.cu.vec': 'e8eb72eb7fbc224b62ed32dbd897c8c7f6cc5c0a',
+     'wiki.or.vec': 'a6b120fe536b6c0133b077dca0043c3bc97eef0b',
+     'wiki.om.vec': '91789a8d9f9284f7e71e4bb8d9a60eae4af4adca',
+     'wiki.os.vec': '791b26cc300e9a1f0a08c7b2213a264e41ce30d6',
+     'wiki.pfl.vec': '0ad9b7f3ae13f909f12835107432fee4c4ed3031',
+     'wiki.pi.vec': '07a5d05e5363e8b8b132220a71de4bdc0a623cfc',
+     'wiki.pag.vec': '03f71faf060c4eb33802275279967349c0337553',
+     'wiki.pap.vec': '8cd98267cc55a4f9de80212e29651ddf7a9e83fd',
+     'wiki.ps.vec': '64f1bec5d5b937289199ceae2e1da6557ce48852',
+     'wiki.pdc.vec': '401e24d0fb9b0ae9e06a5c700684361f58727fcf',
+     'wiki.fa.vec': '09b6cc685c895c66b853af9617787d3ab0891e2c',
+     'wiki.pcd.vec': 'd2e8e7321b6f1bce94c563cb8ef8af2b45cc3e48',
+     'wiki.pms.vec': 'e30bda8d33d61db43243c157b9ac2feeaff316c8',
+     'wiki.pl.vec': 'd031adb6f83eda0364a861dcbf5ef779b5951c0b',
+     'wiki.pnt.vec': 'a9efbf962a895e1d08dde5fd56797dd03abb421e',
+     'wiki.pt.vec': '7f11ebdb0cbf5929b38319f1e977d2c13bcd741b',
+     'wiki.qu.vec': '58de8c8290e8bc8f2a6a677312e28457113437b2',
+     'wiki.ksh.vec': '4c3bb4f12073532b6fb7cc6c2be5e53319ef5b65',
+     'wiki.rmy.vec': '309fb92222b03f3bd4f2260c02bbd1e3f3d3aba7',
+     'wiki.ro.vec': 'c088ea2752d5ec8b42e32410c191a14839ae8a1f',
+     'wiki.rm.vec': '5d3144b47a0dd98648a6df0636384ab2a010ad7b',
+     'wiki.ru.vec': '7514a2c60ee4118abb451ed32a0d61cb52dec384',
+     'wiki.rue.vec': 'fe539e0ea0bbbfd3ee06bd0c5521a035c7361ec5',
+     'wiki.sah.vec': '202470467194a1cbdcd571b14ef68371a29b38d9',
+     'wiki.sm.vec': '88c2c57ca483626b052403418cb4372d72352bc9',
+     'wiki.bat_smg.vec': 'cb3aef58da2011183b39fca64cabf3d9d7a62f4b',
+     'wiki.sg.vec': '7b9c8294c060bd10839650afd1f247b950aa819d',
+     'wiki.sa.vec': '7fed78d1d7674453b9876ee99aeeeba85ea46699',
+     'wiki.sc.vec': 'dba8dc7754ef04b1ba0cd702d94eea9575cde91c',
+     'wiki.stq.vec': '1bf88af29f1d86cac16042a5bea6b1651c96a8c1',
+     'wiki.sco.vec': '4625a5ad90a57f994be9b3aa4f8f3ecda941a821',
+     'wiki.gd.vec': 'f4b513598a1bf0f0d5b6521ea8ce363e9596cb97',
+     'wiki.sr.vec': '3cf09f476f55a92fdd2880f7ba336656ab232736',
+     'wiki.sh.vec': '016691ecb26ace442731d92b1265e5c6c3d8ca5f',
+     'wiki.st.vec': '963646055d12873b1c83b0eef8649ecaf473d42e',
+     'wiki.sn.vec': '8dbb1019dcc8f842a8c0f550295ae697f8e1b7e0',
+     'wiki.scn.vec': 'bde043a235551e1643506774c5d9b61ecf2fc424',
+     'wiki.szl.vec': '0573cf888ec70b459b0596d34814fe60fd69f190',
+     'wiki.simple.vec': '55267c50fbdf4e4ae0fbbda5c73830a379d68795',
+     'wiki.sd.vec': '36852d1253496e598fbd9b9009f07f454a6bea5b',
+     'wiki.si.vec': 'd05ed6a0bc1ee56e5d2e5f881d47372095f6eb0c',
+     'wiki.sk.vec': '98759aacf7352d49a51390fae02030776510ae13',
+     'wiki.sl.vec': 'b26997c0ed1de26a47b11efdc26ac1e7f189fa54',
+     'wiki.so.vec': '294756b60b03fe57cb08abd8d677d6a717b40bc8',
+     'wiki.azb.vec': 'e23af0a436b97434813c3cb14ed114cc5b352faa',
+     'wiki.es.vec': '2f41401aa0925167176bcd7a6770423d891dfef5',
+     'wiki.srn.vec': 'faee05e550f5b08809a9ae5586ac4b08c9a1c359',
+     'wiki.su.vec': '25e864495acb6d280bab0e62480f68550c9ceed4',
+     'wiki.sw.vec': '8e70d207dbbd14e60a48e260a23fbf284a8e9f06',
+     'wiki.ss.vec': '488546a3b2f88f549c50ae9f32f1997cc441b039',
+     'wiki.sv.vec': 'eab83ae36701139696477b91b6e8d292ef175053',
+     'wiki.tl.vec': 'd508e229ced7201510999e76d583de3ff2339d8b',
+     'wiki.ty.vec': 'b881f60b8c75a71864d9847a17961d368f3058fc',
+     'wiki.tg.vec': '6a5cd5bfe571ca0359b66d21bf6950553213f42d',
+     'wiki.ta.vec': 'b66b5358527b1f3a6a421ab26464a3c1e75e18af',
+     'wiki.roa_tara.vec': 'b3fcb01ff0bac53a0ba08c5c0c411f26ee83a95a',
+     'wiki.tt.vec': '913bb3a11da6f8142b3bbec3ef065162d9350f1d',
+     'wiki.te.vec': 'e71dcf3cc45da1bcdae5e431324025bd2026d0c8',
+     'wiki.tet.vec': 'f38fe0e76b9b08ff652689eeee42c4fdadd9a47e',
+     'wiki.th.vec': '1d6e0d525392a1042d017534f6c320c5a0afd345',
+     'wiki.bo.vec': '2e9358e03dcfa09da23d2e1499d84b10348fd8a9',
+     'wiki.ti.vec': 'c769fbc99bbb4138a40231e573685c7948d4a4c4',
+     'wiki.tpi.vec': '407b96d235f54f3e0be9dc23a3bab89c6593a621',
+     'wiki.to.vec': '64d512665b55e9ef9a3915e8167347be79310fa0',
+     'wiki.ts.vec': '00f8229e2f230afd388221c0f823a1de9fc0e443',
+     'wiki.tn.vec': '39f45f3fa86645bb25c54150204abcd51cc1048c',
+     'wiki.tcy.vec': '388b1d89642fcc790b688e9643b3d19e14d66f40',
+     'wiki.tum.vec': 'bfbe43364724af882a520d2edcc2ce049c7357cd',
+     'wiki.tr.vec': '13234aa1bf5f99e81d933482b3b83c3e4bf6c85e',
+     'wiki.tk.vec': '33ae577f77d339ab7a0dff88855b8d5c974d0aef',
+     'wiki.tyv.vec': 'e8f9a36dc58e4108c553f96e247a877a099ab5ba',
+     'wiki.tw.vec': 'f329b667d70d9f0b753e55e1b1579b5a5191d3bd',
+     'wiki.udm.vec': '336a8526f22e177faac69573661dc9c3ce36591f',
+     'wiki.uk.vec': '77f7737b9f88eac2b3e130ea8abb8886336fd0c6',
+     'wiki.hsb.vec': '3dc7830544c58535bed308c552d609e13b973502',
+     'wiki.ur.vec': 'cb8132102152a958df72bd3e25f1a72abb4c9c76',
+     'wiki.ug.vec': '586d2febafaf17c9187c599ffd7b96e559103c34',
+     'wiki.uz.vec': '11c3a76dae12b454f693811e33ae2e60015743e2',
+     'wiki.ve.vec': 'b7d2947501de1c30a9f8496d5efae20c051104e1',
+     'wiki.vec.vec': 'ae4b055fba21974e56beecab3a95f9dc24a62fd0',
+     'wiki.vep.vec': 'a38a781fde24f4d7b52aa8bc450b9949dd4e1808',
+     'wiki.vi.vec': 'bc84245b52b2e212e28dc6856c0693ce9845a9c5',
+     'wiki.vo.vec': 'c830988b6965bfce2f932b1be193f7d1f755f411',
+     'wiki.fiu_vro.vec': '168a71a2b1c478e6810fa5dce9612d8bf8a273dc',
+     'wiki.wa.vec': '18f9ca1a585e1d18c3630029141a2e19d7d34a8e',
+     'wiki.war.vec': '1f5d443d6f612b59a53820dd6f39fd886a6ad30f',
+     'wiki.cy.vec': '32d976a9bfc4dd6e39328c906eead0f597bd9e25',
+     'wiki.vls.vec': '07e8636908c057b9870ce4b98c7130d460cf882a',
+     'wiki.fy.vec': 'd4beef537b7ff142a3986513879ff51a9ec14a7b',
+     'wiki.pnb.vec': '35f38862d3d83012d6db7baa8a4105e3e0a416e7',
+     'wiki.wo.vec': '2ad96a7a9e640bc0dbcf316b1f414b92802dcb8e',
+     'wiki.wuu.vec': 'e1cbae1d3ad52329d0f36ada764016fbacf07049',
+     'wiki.xh.vec': 'bf37f741b0b75953281d11df2b4d80100df9e666',
+     'wiki.yi.vec': '299d61958b7dcc38774768f1489121384726d860',
+     'wiki.yo.vec': 'e35c8aff2924ba07936be9d0d94bd298f09702a4',
+     'wiki.diq.vec': '77f3c370d1d77806fafe368cf788af550ff607dd',
+     'wiki.zea.vec': 'ee12db26aab3f2b3b2745a298ef414e7aeb5a058',
+     'wiki.za.vec': 'e3a0e58bd2e5b1891c71f1f7e37ff71997a20361',
+     'wiki.zu.vec': '4b244b9697a8280e6646842c5fc81bb3a6bc8ec7'}
diff --git a/python/mxnet/contrib/text/embedding.py b/python/mxnet/contrib/text/embedding.py
new file mode 100644
index 0000000000..2996f1ea9f
--- /dev/null
+++ b/python/mxnet/contrib/text/embedding.py
@@ -0,0 +1,669 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# coding: utf-8
+# pylint: disable=consider-iterating-dictionary
+
+"""Text token embeddings."""
+from __future__ import absolute_import
+from __future__ import print_function
+
+import io
+import logging
+import os
+import tarfile
+import warnings
+import zipfile
+
+from . import constants as C
+from .indexer import TokenIndexer
+from ... import ndarray as nd
+from ... import registry
+
+
+class TokenEmbedding(TokenIndexer):
+    """Token embedding base class.
+
+
+    To load token embeddings from an externally hosted pre-trained
+    token embedding file, such as those of GloVe and FastText, use
+    `TokenEmbedding.create(embedding_name, pretrained_file_name)`. To get all
+    the available `embedding_name` and `pretrained_file_name`, use
+    `TokenEmbedding.get_embedding_and_pretrained_file_names()`.
+
+    Alternatively, to load embedding vectors from a custom pre-trained token
+    embedding file, use :class:`~mxnet.text.embedding.CustomEmbedding`.
+
+    For every unknown token, if its representation `self.unknown_token` is
+    encountered in the pre-trained token embedding file, index 0 of
+    `self.idx_to_vec` maps to the pre-trained token embedding vector loaded from
+    the file; otherwise, index 0 of `self.idx_to_vec` maps to the token
+    embedding vector initialized by `init_unknown_vec`.
+
+    If a token is encountered multiple times in the pre-trained token embedding
+    file, only the first-encountered token embedding vector will be loaded and
+    the rest will be skipped.
+
+    For the same token, its index and embedding vector may vary across different
+    instances of :class:`~mxnet.text.embedding.TokenEmbedding`.
+
+
+    Properties
+    ----------
+    token_to_idx : dict mapping str to int
+        A dict mapping each token to its index integer.
+    idx_to_token : list of strs
+        A list of indexed tokens where the list indices and the token indices
+        are aligned.
+    unknown_token : hashable object
+        The representation for any unknown token. In other words, any
+        unknown token will be indexed as the same representation.
+    reserved_tokens : list of strs or None
+        A list of reserved tokens that will always be indexed.
+    vec_len : int
+        The length of the embedding vector for each token.
+    idx_to_vec : mxnet.ndarray.NDArray
+        For all the indexed tokens in this embedding, this NDArray maps each
+        token's index to an embedding vector. The largest valid index maps
+        to the initialized embedding vector for every reserved token, such as an
+        unknown_token token and a padding token.
+    """
+
+    def __init__(self, **kwargs):
+        super(TokenEmbedding, self).__init__(**kwargs)
+
+    @classmethod
+    def _get_download_file_name(cls, pretrained_file_name):
+        return pretrained_file_name
+
+    @classmethod
+    def _get_pretrained_file_url(cls, pretrained_file_name):
+        repo_url = os.environ.get('MXNET_GLUON_REPO', C.APACHE_REPO_URL)
+        embedding_cls = cls.__name__.lower()
+
+        url_format = '{repo_url}gluon/embeddings/{cls}/{file_name}'
+        return url_format.format(repo_url=repo_url,
+                                 cls=embedding_cls,
+                                 file_name=cls._get_download_file_name(pretrained_file_name))
+
+    @classmethod
+    def _get_pretrained_file(cls, embedding_root, pretrained_file_name):
+        from ...gluon.utils import check_sha1, download
+        embedding_cls = cls.__name__.lower()
+        embedding_root = os.path.expanduser(embedding_root)
+        url = cls._get_pretrained_file_url(pretrained_file_name)
+
+        embedding_dir = os.path.join(embedding_root, embedding_cls)
+        pretrained_file_path = os.path.join(embedding_dir, pretrained_file_name)
+        downloaded_file = os.path.basename(url)
+        downloaded_file_path = os.path.join(embedding_dir, downloaded_file)
+
+        expected_file_hash = cls.pretrained_file_name_sha1[pretrained_file_name]
+
+        if hasattr(cls, 'pretrained_archive_name_sha1'):
+            expected_downloaded_hash = \
+                cls.pretrained_archive_name_sha1[downloaded_file]
+        else:
+            expected_downloaded_hash = expected_file_hash
+
+        if not os.path.exists(pretrained_file_path) \
+           or not check_sha1(pretrained_file_path, expected_file_hash):
+            download(url, downloaded_file_path, sha1_hash=expected_downloaded_hash)
+
+            ext = os.path.splitext(downloaded_file)[1]
+            if ext == '.zip':
+                with zipfile.ZipFile(downloaded_file_path, 'r') as zf:
+                    zf.extractall(embedding_dir)
+            elif ext == '.gz':
+                with tarfile.open(downloaded_file_path, 'r:gz') as tar:
+                    tar.extractall(path=embedding_dir)
+        return pretrained_file_path
+
+    def _load_embedding(self, pretrained_file_path, elem_delim,
+                        init_unknown_vec, encoding='utf8'):
+        """Load embedding vectors from the pre-trained token embedding file.
+
+
+        For every unknown token, if its representation `self.unknown_token` is
+        encountered in the pre-trained token embedding file, index 0 of
+        `self.idx_to_vec` maps to the pre-trained token embedding vector loaded
+        from the file; otherwise, index 0 of `self.idx_to_vec` maps to the text
+        embedding vector initialized by `init_unknown_vec`.
+
+        If a token is encountered multiple times in the pre-trained text
+        embedding file, only the first-encountered token embedding vector will
+        be loaded and the rest will be skipped.
+        """
+
+        pretrained_file_path = os.path.expanduser(pretrained_file_path)
+
+        if not os.path.isfile(pretrained_file_path):
+            raise ValueError('`pretrained_file_path` must be a valid path to '
+                             'the pre-trained token embedding file.')
+
+        logging.info('Loading pre-trained token embedding vectors from %s',
+                     pretrained_file_path)
+        vec_len = None
+        all_elems = []
+        tokens = set()
+        loaded_unknown_vec = None
+        line_num = 0
+        with io.open(pretrained_file_path, 'r', encoding=encoding) as f:
+            for line in f:
+                line_num += 1
+                elems = line.rstrip().split(elem_delim)
+
+                assert len(elems) > 1, 'At line %d of the pre-trained text ' \
+                                       'embedding file: the data format of the ' \
+                                       'pre-trained token embedding file %s is ' \
+                                       'unexpected.' \
+                                       % (line_num, pretrained_file_path)
+
+                token, elems = elems[0], [float(i) for i in elems[1:]]
+
+                if token == self.unknown_token and loaded_unknown_vec is None:
+                    loaded_unknown_vec = elems
+                    tokens.add(self.unknown_token)
+                elif token in tokens:
+                    warnings.warn('At line %d of the pre-trained token embedding '
+                                  'file: the embedding vector for token %s has '
+                                  'been loaded and a duplicate embedding for the '
+                                  'same token is seen and skipped.'
+                                  % (line_num, token))
+                elif len(elems) == 1:
+                    warnings.warn('At line %d of the pre-trained text '
+                                  'embedding file: token %s with 1-dimensional '
+                                  'vector %s is likely a header and is '
+                                  'skipped.' % (line_num, token, elems))
+                else:
+                    if vec_len is None:
+                        vec_len = len(elems)
+                        # Reserve a vector slot for the unknown token at the
+                        # very beggining because the unknown index is 0.
+                        all_elems.extend([0] * vec_len)
+                    else:
+                        assert len(elems) == vec_len, \
+                            'At line %d of the pre-trained token embedding ' \
+                            'file: the dimension of token %s is %d but the ' \
+                            'dimension of previous tokens is %d. Dimensions ' \
+                            'of all the tokens must be the same.' \
+                            % (line_num, token, len(elems), vec_len)
+                    all_elems.extend(elems)
+                    self._idx_to_token.append(token)
+                    self._token_to_idx[token] = len(self._idx_to_token) - 1
+                    tokens.add(token)
+
+        self._vec_len = vec_len
+        self._idx_to_vec = nd.array(all_elems).reshape((-1, self.vec_len))
+
+        if loaded_unknown_vec is None:
+            self._idx_to_vec[C.UNKNOWN_IDX] = init_unknown_vec(
+                shape=self.vec_len)
+        else:
+            self._idx_to_vec[C.UNKNOWN_IDX] = nd.array(loaded_unknown_vec)
+
+    @property
+    def vec_len(self):
+        return self._vec_len
+
+    @property
+    def idx_to_vec(self):
+        return self._idx_to_vec
+
+    def get_vecs_by_tokens(self, tokens, lower_case_backup=False):
+        """Look up embedding vectors of tokens.
+
+
+        Parameters
+        ----------
+        tokens : str or list of strs
+            A token or a list of tokens.
+        lower_case_backup : bool, default False
+            If False, each token in the original case will be looked up; if
+            True, each token in the original case will be looked up first, if
+            not found in the keys of the property `token_to_idx`, the token
+            in the lower case will be looked up.
+
+
+        Returns
+        -------
+        mxnet.ndarray.NDArray:
+            The embedding vector(s) of the token(s). According to numpy
+            conventions, if `tokens` is a string, returns a 1-D NDArray of shape
+            `self.vec_len`; if `tokens` is a list of strings, returns a 2-D
+            NDArray of shape=(len(tokens), self.vec_len).
+        """
+
+        to_reduce = False
+        if not isinstance(tokens, list):
+            tokens = [tokens]
+            to_reduce = True
+
+        if not lower_case_backup:
+            indices = [self.token_to_idx.get(token, C.UNKNOWN_IDX)
+                       for token in tokens]
+        else:
+            indices = [self.token_to_idx[token] if token in self.token_to_idx
+                       else self.token_to_idx.get(token.lower(), C.UNKNOWN_IDX)
+                       for token in tokens]
+
+        vecs = nd.Embedding(nd.array(indices), self.idx_to_vec,
+                            self.idx_to_vec.shape[0], self.idx_to_vec.shape[1])
+
+        return vecs[0] if to_reduce else vecs
+
+    def update_token_vectors(self, tokens, new_vectors):
+        """Updates embedding vectors for tokens.
+
+
+        Parameters
+        ----------
+        tokens : str or a list of strs
+            A token or a list of tokens whose embedding vector are to be
+            updated.
+        new_vectors : mxnet.ndarray.NDArray
+            An NDArray to be assigned to the embedding vectors of `tokens`.
+            Its length must be equal to the number of `tokens` and its width
+            must be equal to the dimension of embeddings of the glossary. If
+            `tokens` is a singleton, it must be 1-D or 2-D. If `tokens` is a
+            list of multiple strings, it must be 2-D.
+        """
+
+        assert self.idx_to_vec is not None, \
+            'The property `idx_to_vec` has not been properly set.'
+
+        if not isinstance(tokens, list) or len(tokens) == 1:
+            assert isinstance(new_vectors, nd.NDArray) and \
+                len(new_vectors.shape) in [1, 2], \
+                '`new_vectors` must be a 1-D or 2-D NDArray if `tokens` is a ' \
+                'singleton.'
+            if not isinstance(tokens, list):
+                tokens = [tokens]
+            if len(new_vectors.shape) == 1:
+                new_vectors = new_vectors.expand_dims(0)
+
+        else:
+            assert isinstance(new_vectors, nd.NDArray) and \
+                len(new_vectors.shape) == 2, \
+                '`new_vectors` must be a 2-D NDArray if `tokens` is a list ' \
+                'of multiple strings.'
+        assert new_vectors.shape == (len(tokens), self.vec_len), \
+            'The length of new_vectors must be equal to the number of tokens ' \
+            'and the width of new_vectors must be equal to the dimension of ' \
+            'embeddings of the glossary.'
+
+        indices = []
+        for token in tokens:
+            if token in self.token_to_idx:
+                indices.append(self.token_to_idx[token])
+            else:
+                raise ValueError('Token %s is unknown. To update the embedding '
+                                 'vector for an unknown token, please specify '
+                                 'it explicitly as the `unknown_token` %s in '
+                                 '`tokens`. This is to avoid unintended '
+                                 'updates.' %
+                                 (token, self.idx_to_token[C.UNKNOWN_IDX]))
+
+        self._idx_to_vec[nd.array(indices)] = new_vectors
+
+    @staticmethod
+    def register(embedding_cls):
+        """Registers a new token embedding.
+
+
+        Once an embedding is registered, we can create an instance of this
+        embedding with :func:`~mxnet.text.embedding.TokenEmbedding.create`.
+
+
+        Examples
+        --------
+        >>> @mxnet.text.embedding.TokenEmbedding.register
+        ... class MyTextEmbed(mxnet.text.embedding.TokenEmbedding):
+        ...     def __init__(self, pretrained_file_name='my_pretrain_file'):
+        ...         pass
+        >>> embed = mxnet.text.embedding.TokenEmbedding.create('MyTokenEmbed')
+        >>> print(type(embed))
+        <class '__main__.MyTokenEmbed'>
+        """
+
+        register_text_embedding = registry.get_register_func(
+            TokenEmbedding, 'token embedding')
+        return register_text_embedding(embedding_cls)
+
+    @staticmethod
+    def create(embedding_name, **kwargs):
+        """Creates an instance of :class:`~mxnet.text.embedding.TokenEmbedding`.
+
+
+        Creates a token embedding instance by loading embedding vectors from an
+        externally hosted pre-trained token embedding file, such as those
+        of GloVe and FastText. To get all the valid `embedding_name` and
+        `pretrained_file_name`, use `mxnet.text.embedding.TokenEmbedding.
+        get_embedding_and_pretrained_file_names()`.
+
+
+        Parameters
+        ----------
+        embedding_name : str
+            The token embedding name (case-insensitive).
+
+
+        Returns
+        -------
+        :class:`~mxnet.text.glossary.TokenEmbedding`:
+            A token embedding instance that loads embedding vectors from an
+            externally hosted pre-trained token embedding file.
+        """
+
+        create_text_embedding = registry.get_create_func(
+            TokenEmbedding, 'token embedding')
+        return create_text_embedding(embedding_name, **kwargs)
+
+    @classmethod
+    def _check_pretrained_file_names(cls, pretrained_file_name):
+        """Checks if a pre-trained token embedding file name is valid.
+
+
+        Parameters
+        ----------
+        pretrained_file_name : str
+            The pre-trained token embedding file.
+        """
+
+        embedding_name = cls.__name__.lower()
+        if pretrained_file_name not in cls.pretrained_file_name_sha1:
+            raise KeyError('Cannot find pretrained file %s for token embedding '
+                           '%s. Valid pretrained files for embedding %s: %s' %
+                           (pretrained_file_name, embedding_name,
+                            embedding_name,
+                            ', '.join(cls.pretrained_file_name_sha1.keys())))
+
+    @staticmethod
+    def get_embedding_and_pretrained_file_names(embedding_name=None):
+        """Get valid token embedding names and their pre-trained file names.
+
+
+        To load token embedding vectors from an externally hosted pre-trained
+        token embedding file, such as those of GloVe and FastText, one should
+        use `mxnet.text.embedding.TokenEmbedding.create(embedding_name,
+        pretrained_file_name)`. This method returns all the valid names of
+        `pretrained_file_name` for the specified `embedding_name`. If
+        `embedding_name` is set to None, this method returns all the valid names
+        of `embedding_name` with associated `pretrained_file_name`.
+
+
+        Parameters
+        ----------
+        embedding_name : str or None, default None
+            The pre-trained token embedding name.
+
+
+        Returns
+        -------
+        dict or list:
+            A list of all the valid pre-trained token embedding file names
+            (`pretrained_file_name`) for the specified token embedding name
+            (`embedding_name`). If the text embeding name is set to None,
+            returns a dict mapping each valid token embedding name to a list
+            of valid pre-trained files (`pretrained_file_name`). They can be
+            plugged into `mxnet.text.embedding.TokenEmbedding.create(
+            embedding_name, pretrained_file_name)`.
+        """
+
+        text_embedding_reg = registry.get_registry(TokenEmbedding)
+
+        if embedding_name is not None:
+            if embedding_name not in text_embedding_reg:
+                raise KeyError('Cannot find `embedding_name` %s. Use '
+                               '`get_embedding_and_pretrained_file_names('
+                               'embedding_name=None).keys()` to get all the '
+                               'valid embedding names.' % embedding_name)
+            return list(text_embedding_reg[
+                embedding_name].pretrained_file_name_sha1.keys())
+        else:
+            return {embedding_name: list(
+                embedding_cls.pretrained_file_name_sha1.keys())
+                    for embedding_name, embedding_cls in
+                    registry.get_registry(TokenEmbedding).items()}
+
+
+@TokenEmbedding.register
+class GloVe(TokenEmbedding):
+    """The GloVe word embedding.
+
+
+    GloVe is an unsupervised learning algorithm for obtaining vector
+    representations for words. Training is performed on aggregated global
+    word-word co-occurrence statistics from a corpus, and the resulting
+    representations showcase interesting linear substructures of the word vector
+    space. (Source from https://nlp.stanford.edu/projects/glove/)
+
+    Reference:
+
+    GloVe: Global Vectors for Word Representation.
+    Jeffrey Pennington, Richard Socher, and Christopher D. Manning.
+    https://nlp.stanford.edu/pubs/glove.pdf
+
+    Website:
+
+    https://nlp.stanford.edu/projects/glove/
+
+    To get the updated URLs to the externally hosted pre-trained token embedding
+    files, visit https://nlp.stanford.edu/projects/glove/
+
+    License for pre-trained embeddings:
+
+        https://opendatacommons.org/licenses/pddl/
+
+
+    Parameters
+    ----------
+    pretrain_file : str, default 'glove.840B.300d.txt'
+        The name of the pre-trained token embedding file.
+    embed_root : str, default os.path.join('~', '.mxnet', 'embeddings')
+        The root directory for storing embedding-related files.
+    unknown_vec : callback
+        The callback used to initialize the embedding vector for the unknown
+        token.
+
+
+    Properties
+    ----------
+    token_to_idx : dict mapping str to int
+        A dict mapping each token to its index integer.
+    idx_to_token : list of strs
+        A list of indexed tokens where the list indices and the token indices
+        are aligned.
+    unknown_token : hashable object
+        The representation for any unknown token. In other words, any
+        unknown token will be indexed as the same representation.
+    reserved_tokens : list of strs or None
+        A list of reserved tokens that will always be indexed.
+    vec_len : int
+        The length of the embedding vector for each token.
+    idx_to_vec : mxnet.ndarray.NDArray
+        For all the indexed tokens in this embedding, this NDArray maps each
+        token's index to an embedding vector. The largest valid index maps
+        to the initialized embedding vector for every reserved token, such as an
+        unknown_token token and a padding token.
+    """
+
+    # Map a pre-trained token embedding archive file and its SHA-1 hash.
+    pretrained_archive_name_sha1 = C.GLOVE_PRETRAINED_FILE_SHA1
+
+    # Map a pre-trained token embedding file and its SHA-1 hash.
+    pretrained_file_name_sha1 = C.GLOVE_PRETRAINED_ARCHIVE_SHA1
+
+    @classmethod
+    def _get_download_file_name(cls, pretrained_file_name):
+        # Map a pretrained embedding file to its archive to download.
+        src_archive = {archive.split('.')[1]: archive for archive in
+                       GloVe.pretrained_archive_name_sha1.keys()}
+        archive = src_archive[pretrained_file_name.split('.')[1]]
+        return archive
+
+    def __init__(self, pretrained_file_name='glove.840B.300d.txt',
+                 embedding_root=os.path.join('~', '.mxnet', 'embeddings'),
+                 init_unknown_vec=nd.zeros, **kwargs):
+        GloVe._check_pretrained_file_names(pretrained_file_name)
+
+        super(GloVe, self).__init__(**kwargs)
+        pretrained_file_path = GloVe._get_pretrained_file(embedding_root,
+                                                          pretrained_file_name)
+
+        self._load_embedding(pretrained_file_path, ' ', init_unknown_vec)
+
+
+@TokenEmbedding.register
+class FastText(TokenEmbedding):
+    """The fastText word embedding.
+
+
+    FastText is an open-source, free, lightweight library that allows users to
+    learn text representations and text classifiers. It works on standard,
+    generic hardware. Models can later be reduced in size to even fit on mobile
+    devices. (Source from https://fasttext.cc/)
+
+    References:
+
+    Enriching Word Vectors with Subword Information.
+    Piotr Bojanowski, Edouard Grave, Armand Joulin, and Tomas Mikolov.
+    https://arxiv.org/abs/1607.04606
+
+    Bag of Tricks for Efficient Text Classification.
+    Armand Joulin, Edouard Grave, Piotr Bojanowski, and Tomas Mikolov.
+    https://arxiv.org/abs/1607.01759
+
+    FastText.zip: Compressing text classification models.
+    Armand Joulin, Edouard Grave, Piotr Bojanowski, Matthijs Douze, Herve Jegou,
+    and Tomas Mikolov.
+    https://arxiv.org/abs/1612.03651
+
+    Website:
+
+    https://fasttext.cc/
+
+    To get the updated URLs to the externally hosted pre-trained token embedding
+    files, visit
+    https://github.com/facebookresearch/fastText/blob/master/pretrained-vectors.md
+
+    License for pre-trained embeddings:
+
+        https://creativecommons.org/licenses/by-sa/3.0/
+
+
+    Parameters
+    ----------
+    pretrain_file : str, default 'wiki.en.vec'
+        The name of the pre-trained token embedding file.
+    embed_root : str, default os.path.join('~', '.mxnet', 'embeddings')
+        The root directory for storing embedding-related files.
+    unknown_vec : callback
+        The callback used to initialize the embedding vector for the unknown
+        token.
+
+
+    Properties
+    ----------
+    token_to_idx : dict mapping str to int
+        A dict mapping each token to its index integer.
+    idx_to_token : list of strs
+        A list of indexed tokens where the list indices and the token indices
+        are aligned.
+    unknown_token : hashable object
+        The representation for any unknown token. In other words, any
+        unknown token will be indexed as the same representation.
+    reserved_tokens : list of strs or None
+        A list of reserved tokens that will always be indexed.
+    vec_len : int
+        The length of the embedding vector for each token.
+    idx_to_vec : mxnet.ndarray.NDArray
+        For all the indexed tokens in this embedding, this NDArray maps each
+        token's index to an embedding vector. The largest valid index maps
+        to the initialized embedding vector for every reserved token, such as an
+        unknown_token token and a padding token.
+    """
+
+    # Map a pre-trained token embedding file and its SHA-1 hash.
+    pretrained_file_name_sha1 = C.FAST_TEXT_FILE_SHA1
+
+    def __init__(self, pretrained_file_name='wiki.simple.vec',
+                 embedding_root=os.path.join('~', '.mxnet', 'embeddings'),
+                 init_unknown_vec=nd.zeros, **kwargs):
+        FastText._check_pretrained_file_names(pretrained_file_name)
+
+        super(FastText, self).__init__(**kwargs)
+        pretrained_file_path = FastText._get_pretrained_file(embedding_root,
+                                                             pretrained_file_name)
+
+        self._load_embedding(pretrained_file_path, ' ', init_unknown_vec)
+
+
+class CustomEmbedding(TokenEmbedding):
+    """User-defined token embedding.
+
+    This is to load embedding vectors from a user-defined pre-trained text
+    embedding file.
+
+    Denote by '<ed>' the argument `elem_delim`. Denote by <v_ij> the j-th
+    element of the token embedding vector for <token_i>, the expected format of
+    a custom pre-trained token embedding file is:
+
+    '<token_1><ed><v_11><ed><v_12><ed>...<ed><v_1k>\\\\n<token_2><ed><v_21><ed>
+    <v_22><ed>...<ed><v_2k>\\\\n...'
+
+    where k is the length of the embedding vector `vec_len`.
+
+
+    Parameters
+    ----------
+    pretrain_file_path : str
+        The path to the custom pre-trained token embedding file.
+    elem_delim : str, default ' '
+        The delimiter for splitting a token and every embedding vector element
+        value on the same line of the custom pre-trained token embedding file.
+    unknown_vec : callback
+        The callback used to initialize the embedding vector for the unknown
+        token.
+
+
+    Properties
+    ----------
+    token_to_idx : dict mapping str to int
+        A dict mapping each token to its index integer.
+    idx_to_token : list of strs
+        A list of indexed tokens where the list indices and the token indices
+        are aligned.
+    unknown_token : hashable object
+        The representation for any unknown token. In other words, any
+        unknown token will be indexed as the same representation.
+    reserved_tokens : list of strs or None
+        A list of reserved tokens that will always be indexed.
+    vec_len : int
+        The length of the embedding vector for each token.
+    idx_to_vec : mxnet.ndarray.NDArray
+        For all the indexed tokens in this embedding, this NDArray maps each
+        token's index to an embedding vector. The largest valid index maps
+        to the initialized embedding vector for every reserved token, such as an
+        unknown_token token and a padding token.
+    """
+
+    def __init__(self, pretrained_file_path, elem_delim=' ', encoding='utf8',
+                 init_unknown_vec=nd.zeros, **kwargs):
+        super(CustomEmbedding, self).__init__(**kwargs)
+        self._load_embedding(pretrained_file_path, elem_delim, init_unknown_vec,
+                             encoding)
diff --git a/python/mxnet/contrib/text/glossary.py b/python/mxnet/contrib/text/glossary.py
new file mode 100644
index 0000000000..4de082b5f8
--- /dev/null
+++ b/python/mxnet/contrib/text/glossary.py
@@ -0,0 +1,142 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# coding: utf-8
+
+"""Index text tokens and load their embeddings."""
+from __future__ import absolute_import
+from __future__ import print_function
+
+from ... import ndarray as nd
+from .embedding import TokenEmbedding
+
+
+class Glossary(TokenEmbedding):
+    """Indexing and embedding for text tokens in a glossary.
+
+
+    For each indexed token in a glossary, an embedding vector will be associated
+    with it. Such embedding vectors can be loaded from externally hosted or
+    custom pre-trained token embedding files, such as via instances of
+    :class:`~mxnet.text.embedding.TokenEmbedding`.
+
+
+    Parameters
+    ----------
+    counter : collections.Counter or None, default None
+        Counts text token frequencies in the text data. Its keys will be indexed
+        according to frequency thresholds such as `most_freq_count` and
+        `min_freq`. Keys of `counter`, `unknown_token`, and values of
+        `reserved_tokens` must be of the same hashable type. Examples: str, int,
+        and tuple.
+    token_embeddings : instance or list of :class:`~TokenEmbedding`
+        One or multiple pre-trained token embeddings to load. If it is a list of
+        multiple embeddings, these embedding vectors will be concatenated for
+        each token.
+    most_freq_count : None or int, default None
+        The maximum possible number of the most frequent tokens in the keys of
+        `counter` that can be indexed. Note that this argument does not count
+        any token from `reserved_tokens`. If this argument is None or larger
+        than its largest possible value restricted by `counter` and
+        `reserved_tokens`, this argument becomes positive infinity.
+    min_freq : int, default 1
+        The minimum frequency required for a token in the keys of `counter` to
+        be indexed.
+    unknown_token : hashable object, default '<unk>'
+        The representation for any unknown token. In other words, any unknown
+        token will be indexed as the same representation. Keys of `counter`,
+        `unknown_token`, and values of `reserved_tokens` must be of the same
+        hashable type. Examples: str, int, and tuple.
+    reserved_tokens : list of hashable objects or None, default None
+        A list of reserved tokens that will always be indexed, such as special
+        symbols representing padding, beginning of sentence, and end of
+        sentence. It cannot contain `unknown_token`, or duplicate reserved
+        tokens. Keys of `counter`, `unknown_token`, and values of
+        `reserved_tokens` must be of the same hashable type. Examples: str, int,
+        and tuple.
+
+
+    Properties
+    ----------
+    token_to_idx : dict mapping str to int
+        A dict mapping each token to its index integer.
+    idx_to_token : list of strs
+        A list of indexed tokens where the list indices and the token indices
+        are aligned.
+    unknown_token : hashable object
+        The representation for any unknown token. In other words, any
+        unknown token will be indexed as the same representation.
+    reserved_tokens : list of strs or None
+        A list of reserved tokens that will always be indexed.
+    vec_len : int
+        The length of the embedding vector for each token.
+    idx_to_vec : mxnet.ndarray.NDArray
+        For all the indexed tokens in this embedding, this NDArray maps each
+        token's index to an embedding vector. The largest valid index maps
+        to the initialized embedding vector for every reserved token, such as an
+        unknown_token token and a padding token.
+    """
+    def __init__(self, counter, token_embeddings, most_freq_count=None,
+                 min_freq=1, unknown_token='<unk>', reserved_tokens=None):
+
+        if not isinstance(token_embeddings, list):
+            token_embeddings = [token_embeddings]
+
+        # Sanity checks.
+        for embed in token_embeddings:
+            assert isinstance(embed, TokenEmbedding), \
+                'The parameter `token_embeddings` must be an instance or a ' \
+                'list of instances of `mxnet.text.embedding.TextEmbed` ' \
+                'whose embedding vectors will be loaded or ' \
+                'concatenated-then-loaded to map to the indexed tokens.'
+
+        # Index tokens from keys of `counter` and reserved tokens.
+        super(Glossary, self).__init__(counter=counter,
+                                       most_freq_count=most_freq_count,
+                                       min_freq=min_freq,
+                                       unknown_token=unknown_token,
+                                       reserved_tokens=reserved_tokens)
+
+        # Set _idx_to_vec so that indices of tokens from keys of `counter` are
+        # associated with token embedding vectors from `token_embeddings`.
+        self._set_idx_to_vec_by_embeds(token_embeddings)
+
+    def _set_idx_to_vec_by_embeds(self, token_embeddings):
+        """Sets the mapping between token indices and token embedding vectors.
+
+
+        Parameters
+        ----------
+        token_embeddings : an instance or a list of instances of
+            :class:`~mxnet.text.embedding.TokenEmbedding`
+            One or multiple pre-trained token embeddings to load. If it is a
+            list of multiple embeddings, these embedding vectors will be
+            concatenated for each token.
+        """
+
+        self._vec_len = sum(embed.vec_len for embed in token_embeddings)
+        self._idx_to_vec = nd.zeros(shape=(len(self), self.vec_len))
+
+        col_start = 0
+        # Concatenate all the embedding vectors in token_embeddings.
+        for embed in token_embeddings:
+            col_end = col_start + embed.vec_len
+            # Cancatenate vectors of the unknown token.
+            self._idx_to_vec[0, col_start:col_end] = embed.idx_to_vec[0]
+            self._idx_to_vec[1:, col_start:col_end] = embed.get_vecs_by_tokens(
+                self.idx_to_token[1:])
+            col_start = col_end
diff --git a/python/mxnet/contrib/text/indexer.py b/python/mxnet/contrib/text/indexer.py
new file mode 100644
index 0000000000..bed2794b29
--- /dev/null
+++ b/python/mxnet/contrib/text/indexer.py
@@ -0,0 +1,231 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# coding: utf-8
+# pylint: disable=consider-iterating-dictionary
+
+"""Text token indexer."""
+from __future__ import absolute_import
+from __future__ import print_function
+
+from collections import Counter
+
+from . import constants as C
+
+
+class TokenIndexer(object):
+    """Indexing for text tokens.
+
+
+    Build indices for the unknown token, reserved tokens, and input counter
+    keys. Indexed tokens can be used by instances of
+    :class:`~mxnet.text.embedding.TokenEmbedding`, such as instances of
+    :class:`~mxnet.text.glossary.Glossary`.
+
+
+    Parameters
+    ----------
+    counter : collections.Counter or None, default None
+        Counts text token frequencies in the text data. Its keys will be indexed
+        according to frequency thresholds such as `most_freq_count` and
+        `min_freq`. Keys of `counter`, `unknown_token`, and values of
+        `reserved_tokens` must be of the same hashable type. Examples: str, int,
+        and tuple.
+    most_freq_count : None or int, default None
+        The maximum possible number of the most frequent tokens in the keys of
+        `counter` that can be indexed. Note that this argument does not count
+        any token from `reserved_tokens`. Suppose that there are different
+        keys of `counter` whose frequency are the same, if indexing all of them
+        will exceed this argument value, such keys will be indexed one by one
+        according to their __cmp__() order until the frequency threshold is
+        met. If this argument is None or larger than its largest possible value
+        restricted by `counter` and `reserved_tokens`, this argument has no
+        effect.
+    min_freq : int, default 1
+        The minimum frequency required for a token in the keys of `counter` to
+        be indexed.
+    unknown_token : hashable object, default '<unk>'
+        The representation for any unknown token. In other words, any unknown
+        token will be indexed as the same representation. Keys of `counter`,
+        `unknown_token`, and values of `reserved_tokens` must be of the same
+        hashable type. Examples: str, int, and tuple.
+    reserved_tokens : list of hashable objects or None, default None
+        A list of reserved tokens that will always be indexed, such as special
+        symbols representing padding, beginning of sentence, and end of
+        sentence. It cannot contain `unknown_token`, or duplicate reserved
+        tokens. Keys of `counter`, `unknown_token`, and values of
+        `reserved_tokens` must be of the same hashable type. Examples: str, int,
+        and tuple.
+
+
+    Properties
+    ----------
+    token_to_idx : dict mapping str to int
+        A dict mapping each token to its index integer.
+    idx_to_token : list of strs
+        A list of indexed tokens where the list indices and the token indices
+        are aligned.
+    unknown_token : hashable object
+        The representation for any unknown token. In other words, any
+        unknown token will be indexed as the same representation.
+    reserved_tokens : list of strs or None
+        A list of reserved tokens that will always be indexed.
+    """
+
+    def __init__(self, counter=None, most_freq_count=None, min_freq=1,
+                 unknown_token='<unk>', reserved_tokens=None):
+
+        # Sanity checks.
+        assert min_freq > 0, '`min_freq` must be set to a positive value.'
+
+        if reserved_tokens is not None:
+            reserved_token_set = set(reserved_tokens)
+            assert unknown_token not in reserved_token_set, \
+                '`reserved_token` cannot contain `unknown_token`.'
+            assert len(reserved_token_set) == len(reserved_tokens), \
+                '`reserved_tokens` cannot contain duplicate reserved tokens.'
+
+        self._index_unknown_and_reserved_tokens(unknown_token, reserved_tokens)
+
+        if counter is not None:
+            self._index_counter_keys(counter, unknown_token, reserved_tokens,
+                                     most_freq_count, min_freq)
+
+    def _index_unknown_and_reserved_tokens(self, unknown_token,
+                                           reserved_tokens):
+        """Indexes unknown and reserved tokens."""
+
+        self._unknown_token = unknown_token
+        # Thus, constants.UNKNOWN_IDX must be 0.
+        self._idx_to_token = [unknown_token]
+
+        if reserved_tokens is None:
+            self._reserved_tokens = None
+        else:
+            self._reserved_tokens = reserved_tokens[:]
+            self._idx_to_token.extend(reserved_tokens)
+
+        self._token_to_idx = {token: idx for idx, token in
+                              enumerate(self._idx_to_token)}
+
+    def _index_counter_keys(self, counter, unknown_token, reserved_tokens,
+                            most_freq_count, min_freq):
+        """Indexes keys of `counter`.
+
+
+        Indexes keys of `counter` according to frequency thresholds such as
+        `most_freq_count` and `min_freq`.
+        """
+
+        assert isinstance(counter, Counter), \
+            '`counter` must be an instance of collections.Counter.'
+
+        unknown_and_reserved_tokens = set(reserved_tokens) \
+            if reserved_tokens is not None else set()
+        unknown_and_reserved_tokens.add(unknown_token)
+
+        token_freqs = sorted(counter.items(), key=lambda x: x[0])
+        token_freqs.sort(key=lambda x: x[1], reverse=True)
+
+        token_cap = len(unknown_and_reserved_tokens) + (
+            len(counter) if most_freq_count is None else most_freq_count)
+
+        for token, freq in token_freqs:
+            if freq < min_freq or len(self._idx_to_token) == token_cap:
+                break
+            if token not in unknown_and_reserved_tokens:
+                self._idx_to_token.append(token)
+                self._token_to_idx[token] = len(self._idx_to_token) - 1
+
+    def __len__(self):
+        return len(self.idx_to_token)
+
+    @property
+    def token_to_idx(self):
+        return self._token_to_idx
+
+    @property
+    def idx_to_token(self):
+        return self._idx_to_token
+
+    @property
+    def unknown_token(self):
+        return self._unknown_token
+
+    @property
+    def reserved_tokens(self):
+        return self._reserved_tokens
+
+    def to_indices(self, tokens):
+        """Converts tokens to indices according to the text indexer.
+
+
+        Parameters
+        ----------
+        tokens : str or list of strs
+            A source token or tokens to be converted.
+
+
+        Returns
+        -------
+        int or list of ints
+            A token index or a list of token indices according to the text
+            indexer.
+        """
+
+        to_reduce = False
+        if not isinstance(tokens, list):
+            tokens = [tokens]
+            to_reduce = True
+
+        indices = [self.token_to_idx[token] if token in self.token_to_idx
+                   else C.UNKNOWN_IDX for token in tokens]
+
+        return indices[0] if to_reduce else indices
+
+    def to_tokens(self, indices):
+        """Converts token indices to tokens according to the text indexer.
+
+
+        Parameters
+        ----------
+        indices : int or list of ints
+            A source token index or token indices to be converted.
+
+
+        Returns
+        -------
+        str or list of strs
+            A token or a list of tokens according to the text indexer.
+        """
+
+        to_reduce = False
+        if not isinstance(indices, list):
+            indices = [indices]
+            to_reduce = True
+
+        max_idx = len(self.idx_to_token) - 1
+
+        tokens = []
+        for idx in indices:
+            if not isinstance(idx, int) or idx > max_idx:
+                raise ValueError('Token index %d in the provided `indices` is '
+                                 'invalid.' % idx)
+            else:
+                tokens.append(self.idx_to_token[idx])
+
+        return tokens[0] if to_reduce else tokens
diff --git a/python/mxnet/contrib/text/utils.py b/python/mxnet/contrib/text/utils.py
new file mode 100644
index 0000000000..91e1b623ed
--- /dev/null
+++ b/python/mxnet/contrib/text/utils.py
@@ -0,0 +1,79 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# coding: utf-8
+
+"""Provide utilities for text data processing."""
+from __future__ import absolute_import
+from __future__ import print_function
+
+from collections import Counter
+import re
+
+
+def count_tokens_from_str(source_str, token_delim=' ', seq_delim='\n',
+                          to_lower=False, counter_to_update=None):
+    """Counts tokens in the specified string.
+
+    For token_delim='<td>' and seq_delim='<sd>', a specified string of two
+    sequences of tokens may look like::
+
+    <td>token1<td>token2<td>token3<td><sd><td>token4<td>token5<td><sd>
+
+
+    Parameters
+    ----------
+    source_str : str
+        A source string of tokens.
+    token_delim : str, default ' '
+        A token delimiter.
+    seq_delim : str, default '\\\\n'
+        A sequence delimiter.
+    to_lower : bool, default False
+        Whether to convert the source source_str to the lower case.
+    counter_to_update : collections.Counter or None, default None
+        The collections.Counter instance to be updated with the token counts
+        of `source_str`. If None, return a new collections.Counter instance
+        counting tokens from `source_str`.
+
+
+    Returns
+    -------
+    collections.Counter
+        The `counter_to_update` collections.Counter instance after being updated
+        with the token counts of `source_str`. If `counter_to_update` is None,
+        return a new collections.Counter instance counting tokens from
+        `source_str`.
+
+
+    Examples
+    --------
+    >>> source_str = ' Life is great ! \\n life is good . \\n'
+    >>> count_tokens_from_str(token_line, ' ', '\\n', True)
+    Counter({'!': 1, '.': 1, 'good': 1, 'great': 1, 'is': 2, 'life': 2})
+    """
+
+    source_str = filter(None,
+                        re.split(token_delim + '|' + seq_delim, source_str))
+    if to_lower:
+        source_str = [t.lower() for t in source_str]
+
+    if counter_to_update is None:
+        return Counter(source_str)
+    else:
+        counter_to_update.update(source_str)
+        return counter_to_update
diff --git a/python/mxnet/gluon/contrib/__init__.py b/python/mxnet/gluon/contrib/__init__.py
index 3f8b64be9b..e06438b4f8 100644
--- a/python/mxnet/gluon/contrib/__init__.py
+++ b/python/mxnet/gluon/contrib/__init__.py
@@ -18,4 +18,6 @@
 # coding: utf-8
 """Contrib neural network module."""
 
+from . import nn
+
 from . import rnn
diff --git a/python/mxnet/gluon/contrib/nn/__init__.py b/python/mxnet/gluon/contrib/nn/__init__.py
new file mode 100644
index 0000000000..62440cda27
--- /dev/null
+++ b/python/mxnet/gluon/contrib/nn/__init__.py
@@ -0,0 +1,26 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# coding: utf-8
+# pylint: disable=wildcard-import
+"""Contrib recurrent neural network module."""
+
+from . import basic_layers
+
+from .basic_layers import *
+
+__all__ = basic_layers.__all__
diff --git a/python/mxnet/gluon/model_zoo/custom_layers.py b/python/mxnet/gluon/contrib/nn/basic_layers.py
similarity index 53%
rename from python/mxnet/gluon/model_zoo/custom_layers.py
rename to python/mxnet/gluon/contrib/nn/basic_layers.py
index 8c481b3c36..88708884c5 100644
--- a/python/mxnet/gluon/model_zoo/custom_layers.py
+++ b/python/mxnet/gluon/contrib/nn/basic_layers.py
@@ -18,52 +18,82 @@
 # coding: utf-8
 # pylint: disable= arguments-differ
 """Custom neural network layers in model_zoo."""
-__all__ = ['HybridConcurrent', 'Identity']
+__all__ = ['Concurrent', 'HybridConcurrent', 'Identity']
 
-from ..block import Block, HybridBlock
-from ..utils import _indent
+from .... import nd
+from ...block import HybridBlock
+from ...nn import Sequential, HybridSequential
 
-class HybridConcurrent(HybridBlock):
+class Concurrent(Sequential):
+    """Lays `Block`s concurrently.
+
+    This block feeds its input to all children blocks, and
+    produce the output by concatenating all the children blocks' outputs
+    on the specified axis.
+
+    Example::
+
+        net = Concurrent()
+        # use net's name_scope to give children blocks appropriate names.
+        with net.name_scope():
+            net.add(nn.Dense(10, activation='relu'))
+            net.add(nn.Dense(20))
+            net.add(Identity())
+
+    Parameters
+    ----------
+    axis : int, default -1
+        The axis on which to concatenate the outputs.
+    """
+    def __init__(self, axis=-1, prefix=None, params=None):
+        super(Concurrent, self).__init__(prefix=prefix, params=params)
+        self.axis = axis
+
+    def forward(self, x):
+        out = []
+        for block in self._children:
+            out.append(block(x))
+        out = nd.concat(*out, dim=self.axis)
+        return out
+
+
+class HybridConcurrent(HybridSequential):
     """Lays `HybridBlock`s concurrently.
 
+    This block feeds its input to all children blocks, and
+    produce the output by concatenating all the children blocks' outputs
+    on the specified axis.
+
     Example::
 
         net = HybridConcurrent()
-        # use net's name_scope to give child Blocks appropriate names.
+        # use net's name_scope to give children blocks appropriate names.
         with net.name_scope():
             net.add(nn.Dense(10, activation='relu'))
             net.add(nn.Dense(20))
             net.add(Identity())
+
+    Parameters
+    ----------
+    axis : int, default -1
+        The axis on which to concatenate the outputs.
     """
-    def __init__(self, concat_dim, prefix=None, params=None):
+    def __init__(self, axis=-1, prefix=None, params=None):
         super(HybridConcurrent, self).__init__(prefix=prefix, params=params)
-        self.concat_dim = concat_dim
-
-    def add(self, block):
-        """Adds block on top of the stack."""
-        self.register_child(block)
+        self.axis = axis
 
     def hybrid_forward(self, F, x):
         out = []
         for block in self._children:
             out.append(block(x))
-        out = F.concat(*out, dim=self.concat_dim)
+        out = F.concat(*out, dim=self.axis)
         return out
 
-    def __repr__(self):
-        s = '{name}(\n{modstr}\n)'
-        modstr = '\n'.join(['  ({key}): {block}'.format(key=key,
-                                                        block=_indent(block.__repr__(), 2))
-                            for key, block in enumerate(self._children)
-                            if isinstance(block, Block)])
-        return s.format(name=self.__class__.__name__,
-                        modstr=modstr)
-
 
 class Identity(HybridBlock):
     """Block that passes through the input directly.
 
-    This layer is often used in conjunction with HybridConcurrent
+    This block can be used in conjunction with HybridConcurrent
     block for residual connection.
 
     Example::
diff --git a/python/mxnet/gluon/loss.py b/python/mxnet/gluon/loss.py
index 435230ef53..2be43981a6 100644
--- a/python/mxnet/gluon/loss.py
+++ b/python/mxnet/gluon/loss.py
@@ -229,8 +229,8 @@ def __init__(self, from_sigmoid=False, weight=None, batch_axis=0, **kwargs):
     def hybrid_forward(self, F, pred, label, sample_weight=None):
         label = _reshape_like(F, label, pred)
         if not self._from_sigmoid:
-            max_val = F.relu(-pred)
-            loss = pred - pred*label + max_val + F.log(F.exp(-max_val)+F.exp(-pred-max_val))
+            # We use the stable formula: max(x, 0) - x * z + log(1 + exp(-abs(x)))
+            loss = F.relu(pred) - pred * label + F.Activation(-F.abs(pred), act_type='softrelu')
         else:
             loss = -(F.log(pred+1e-12)*label + F.log(1.-pred+1e-12)*(1.-label))
         loss = _apply_weighting(F, loss, self._weight, sample_weight)
@@ -635,8 +635,8 @@ class LogisticLoss(Loss):
 
     Inputs:
         - **pred**: prediction tensor with arbitrary shape.
-        - **label**: truth tensor with values -1 or 1. Must have the same size
-          as pred.
+        - **label**: truth tensor with values -1/1 (label_format is 'signed')
+          or 0/1 (label_format is 'binary'). Must have the same size as pred.
         - **sample_weight**: element-wise weighting tensor. Must be broadcastable
           to the same shape as pred. For example, if pred has shape (64, 10)
           and you want to weigh each sample in the batch separately,
@@ -655,9 +655,10 @@ def __init__(self, weight=None, batch_axis=0, label_format='signed', **kwargs):
 
     def hybrid_forward(self, F, pred, label, sample_weight=None):
         label = _reshape_like(F, label, pred)
-        if self._label_format == 'binary':
-            label = 2 * label - 1  # Transform label to be either -1 or 1
-        loss = F.log(1.0 + F.exp(-pred * label))
+        if self._label_format == 'signed':
+            label = (label + 1.0) / 2.0  # Transform label to be either 0 or 1
+        # Use a stable formula in computation
+        loss = F.relu(pred) - pred * label + F.Activation(-F.abs(pred), act_type='softrelu')
         loss = _apply_weighting(F, loss, self._weight, sample_weight)
         return F.mean(loss, axis=self._batch_axis, exclude=True)
 
diff --git a/python/mxnet/gluon/model_zoo/vision/densenet.py b/python/mxnet/gluon/model_zoo/vision/densenet.py
index 37a91e686d..835336739a 100644
--- a/python/mxnet/gluon/model_zoo/vision/densenet.py
+++ b/python/mxnet/gluon/model_zoo/vision/densenet.py
@@ -25,7 +25,7 @@
 from ....context import cpu
 from ...block import HybridBlock
 from ... import nn
-from ..custom_layers import HybridConcurrent, Identity
+from ...contrib.nn import HybridConcurrent, Identity
 
 # Helpers
 def _make_dense_block(num_layers, bn_size, growth_rate, dropout, stage_index):
@@ -46,7 +46,7 @@ def _make_dense_layer(growth_rate, bn_size, dropout):
     if dropout:
         new_features.add(nn.Dropout(dropout))
 
-    out = HybridConcurrent(concat_dim=1, prefix='')
+    out = HybridConcurrent(axis=1, prefix='')
     out.add(Identity())
     out.add(new_features)
 
diff --git a/python/mxnet/gluon/model_zoo/vision/inception.py b/python/mxnet/gluon/model_zoo/vision/inception.py
index 42f0d3dd75..6d75050b83 100644
--- a/python/mxnet/gluon/model_zoo/vision/inception.py
+++ b/python/mxnet/gluon/model_zoo/vision/inception.py
@@ -25,7 +25,7 @@
 from ....context import cpu
 from ...block import HybridBlock
 from ... import nn
-from ..custom_layers import HybridConcurrent
+from ...contrib.nn import HybridConcurrent
 
 # Helpers
 def _make_basic_conv(**kwargs):
@@ -51,7 +51,7 @@ def _make_branch(use_pool, *conv_settings):
     return out
 
 def _make_A(pool_features, prefix):
-    out = HybridConcurrent(concat_dim=1, prefix=prefix)
+    out = HybridConcurrent(axis=1, prefix=prefix)
     with out.name_scope():
         out.add(_make_branch(None,
                              (64, 1, None, None)))
@@ -67,7 +67,7 @@ def _make_A(pool_features, prefix):
     return out
 
 def _make_B(prefix):
-    out = HybridConcurrent(concat_dim=1, prefix=prefix)
+    out = HybridConcurrent(axis=1, prefix=prefix)
     with out.name_scope():
         out.add(_make_branch(None,
                              (384, 3, 2, None)))
@@ -79,7 +79,7 @@ def _make_B(prefix):
     return out
 
 def _make_C(channels_7x7, prefix):
-    out = HybridConcurrent(concat_dim=1, prefix=prefix)
+    out = HybridConcurrent(axis=1, prefix=prefix)
     with out.name_scope():
         out.add(_make_branch(None,
                              (192, 1, None, None)))
@@ -98,7 +98,7 @@ def _make_C(channels_7x7, prefix):
     return out
 
 def _make_D(prefix):
-    out = HybridConcurrent(concat_dim=1, prefix=prefix)
+    out = HybridConcurrent(axis=1, prefix=prefix)
     with out.name_scope():
         out.add(_make_branch(None,
                              (192, 1, None, None),
@@ -112,7 +112,7 @@ def _make_D(prefix):
     return out
 
 def _make_E(prefix):
-    out = HybridConcurrent(concat_dim=1, prefix=prefix)
+    out = HybridConcurrent(axis=1, prefix=prefix)
     with out.name_scope():
         out.add(_make_branch(None,
                              (320, 1, None, None)))
@@ -121,7 +121,7 @@ def _make_E(prefix):
         out.add(branch_3x3)
         branch_3x3.add(_make_branch(None,
                                     (384, 1, None, None)))
-        branch_3x3_split = HybridConcurrent(concat_dim=1, prefix='')
+        branch_3x3_split = HybridConcurrent(axis=1, prefix='')
         branch_3x3_split.add(_make_branch(None,
                                           (384, (1, 3), None, (0, 1))))
         branch_3x3_split.add(_make_branch(None,
@@ -133,7 +133,7 @@ def _make_E(prefix):
         branch_3x3dbl.add(_make_branch(None,
                                        (448, 1, None, None),
                                        (384, 3, None, 1)))
-        branch_3x3dbl_split = HybridConcurrent(concat_dim=1, prefix='')
+        branch_3x3dbl_split = HybridConcurrent(axis=1, prefix='')
         branch_3x3dbl.add(branch_3x3dbl_split)
         branch_3x3dbl_split.add(_make_branch(None,
                                              (384, (1, 3), None, (0, 1))))
diff --git a/python/mxnet/gluon/model_zoo/vision/squeezenet.py b/python/mxnet/gluon/model_zoo/vision/squeezenet.py
index 7eff10260c..09f62a5207 100644
--- a/python/mxnet/gluon/model_zoo/vision/squeezenet.py
+++ b/python/mxnet/gluon/model_zoo/vision/squeezenet.py
@@ -25,14 +25,14 @@
 from ....context import cpu
 from ...block import HybridBlock
 from ... import nn
-from ..custom_layers import HybridConcurrent
+from ...contrib.nn import HybridConcurrent
 
 # Helpers
 def _make_fire(squeeze_channels, expand1x1_channels, expand3x3_channels):
     out = nn.HybridSequential(prefix='')
     out.add(_make_fire_conv(squeeze_channels, 1))
 
-    paths = HybridConcurrent(concat_dim=1, prefix='')
+    paths = HybridConcurrent(axis=1, prefix='')
     paths.add(_make_fire_conv(expand1x1_channels, 1))
     paths.add(_make_fire_conv(expand3x3_channels, 3, 1))
     out.add(paths)
diff --git a/python/mxnet/kvstore.py b/python/mxnet/kvstore.py
index b2a4beaf93..890c9024d8 100644
--- a/python/mxnet/kvstore.py
+++ b/python/mxnet/kvstore.py
@@ -298,7 +298,8 @@ def pull(self, key, out=None, priority=0):
 
     def row_sparse_pull(self, key, out=None, priority=0, row_ids=None):
         """ Pulls a single RowSparseNDArray value or a sequence of RowSparseNDArray values \
-        from the store with specified row_ids.
+        from the store with specified row_ids. When there is only one row_id, KVStoreRowSparsePull \
+        is invoked just once and the result is broadcast to all the rest of outputs.
 
         `row_sparse_pull` is executed asynchronously after all previous
         `pull`/`row_sparse_pull` calls and the last `push` call for the
@@ -349,7 +350,17 @@ def row_sparse_pull(self, key, out=None, priority=0, row_ids=None):
         """
         assert(out is not None)
         assert(row_ids is not None)
-        ckeys, cvals, use_str_keys = _ctype_key_value(key, out)
+        if isinstance(row_ids, NDArray):
+            row_ids = [row_ids]
+        assert(isinstance(row_ids, list)), \
+            "row_ids should be NDArray or list of NDArray"
+        first_out = out
+        # whether row_ids are the same
+        single_rowid = False
+        if len(row_ids) == 1 and isinstance(out, list):
+            single_rowid = True
+            first_out = [out[0]]
+        ckeys, cvals, use_str_keys = _ctype_key_value(key, first_out)
         _, crow_ids, _ = _ctype_key_value(key, row_ids)
         assert(len(crow_ids) == len(cvals)), \
                "the number of row_ids doesn't match the number of values"
@@ -359,6 +370,11 @@ def row_sparse_pull(self, key, out=None, priority=0, row_ids=None):
         else:
             check_call(_LIB.MXKVStorePullRowSparse(
                 self.handle, mx_uint(len(ckeys)), ckeys, cvals, crow_ids, ctypes.c_int(priority)))
+        # the result can be copied to other devices without invoking row_sparse_pull
+        # if the indices are the same
+        if single_rowid:
+            for out_i in out[1:]:
+                out[0].copyto(out_i)
 
     def set_gradient_compression(self, compression_params):
         """ Specifies type of low-bit quantization for gradient compression \
diff --git a/python/mxnet/registry.py b/python/mxnet/registry.py
index 4a4f22fa14..4c131a1b75 100644
--- a/python/mxnet/registry.py
+++ b/python/mxnet/registry.py
@@ -29,6 +29,23 @@
 _REGISTRY = {}
 
 
+def get_registry(base_class):
+    """Get a copy of the registry.
+
+    Parameters
+    ----------
+    base_class : type
+        base class for classes that will be registered.
+
+    Returns
+    -------
+    a registrator
+    """
+    if base_class not in _REGISTRY:
+        _REGISTRY[base_class] = {}
+    return _REGISTRY[base_class].copy()
+
+
 def get_register_func(base_class, nickname):
     """Get registrator function.
 
diff --git a/src/common/utils.cc b/src/common/utils.cc
index 784fcf8651..9fe46d94d0 100644
--- a/src/common/utils.cc
+++ b/src/common/utils.cc
@@ -24,6 +24,7 @@
 
 #include "./utils.h"
 #include "../operator/tensor/cast_storage-inl.h"
+#include "../operator/tensor/sparse_retain-inl.h"
 
 namespace mxnet {
 namespace common {
@@ -34,6 +35,15 @@ void CheckFormatWrapper<cpu>(const RunContext &rctx, const NDArray &input,
   CheckFormatImpl<cpu>(rctx, input, err_cpu, full_check);
 }
 
+template<>
+void SparseRetainOpForwardRspWrapper<cpu>(mshadow::Stream<cpu> *s,
+                                          const NDArray& input_nd,
+                                          const TBlob& idx_data,
+                                          const OpReqType req,
+                                          NDArray* output_nd) {
+  mxnet::op::SparseRetainOpForwardRspImpl<cpu>(s, input_nd, idx_data, req, output_nd);
+}
+
 template<>
 void CastStorageDispatch<cpu>(const OpContext& ctx,
                               const NDArray& input,
diff --git a/src/common/utils.cu b/src/common/utils.cu
index c6e2bf8138..0937d7aa51 100644
--- a/src/common/utils.cu
+++ b/src/common/utils.cu
@@ -24,6 +24,7 @@
 
 #include "./utils.h"
 #include "../operator/tensor/cast_storage-inl.h"
+#include "../operator/tensor/sparse_retain-inl.h"
 
 namespace mxnet {
 namespace common {
@@ -34,6 +35,15 @@ void CheckFormatWrapper<gpu>(const RunContext &rctx, const NDArray &input,
   CheckFormatImpl<gpu>(rctx, input, err_cpu, full_check);
 }
 
+template<>
+void SparseRetainOpForwardRspWrapper<gpu>(mshadow::Stream<gpu> *s,
+                                          const NDArray& input_nd,
+                                          const TBlob& idx_data,
+                                          const OpReqType req,
+                                          NDArray* output_nd) {
+  mxnet::op::SparseRetainOpForwardRspImpl<gpu>(s, input_nd, idx_data, req, output_nd);
+}
+
 template<>
 void CastStorageDispatch<gpu>(const OpContext& ctx,
                               const NDArray& input,
diff --git a/src/common/utils.h b/src/common/utils.h
index 6f7e452565..4bb8024ca4 100644
--- a/src/common/utils.h
+++ b/src/common/utils.h
@@ -213,7 +213,18 @@ void CheckFormatImpl(const RunContext &rctx, const NDArray &input,
   }
 }
 
+/*! \brief Pick rows specified by user input index array from a row sparse ndarray
+ *         and save them in the output sparse ndarray.
+ */
+template<typename xpu>
+void SparseRetainOpForwardRspWrapper(mshadow::Stream<xpu> *s,
+                                     const NDArray& input_nd,
+                                     const TBlob& idx_data,
+                                     const OpReqType req,
+                                     NDArray* output_nd);
 
+/* \brief Casts tensor storage type to the new type.
+ */
 template<typename xpu>
 void CastStorageDispatch(const OpContext& ctx, const NDArray& input, const NDArray& output);
 
diff --git a/src/kvstore/comm.h b/src/kvstore/comm.h
index 5429df70b1..d41fa64cf5 100644
--- a/src/kvstore/comm.h
+++ b/src/kvstore/comm.h
@@ -34,6 +34,7 @@
 #include "gradient_compression.h"
 #include "../ndarray/ndarray_function.h"
 #include "../operator/tensor/sparse_retain-inl.h"
+#include "./utils.h"
 namespace mxnet {
 namespace kvstore {
 /**
@@ -176,17 +177,17 @@ class CommCPU : public Comm {
         reduce[i] = buf.copy_buf[i];
         const_vars[i] = reduce[i].var();
       }
-      auto result = buf.merged;
+      NDArray result = buf.merged;
+      Resource rsc = ResourceManager::Get()->Request(result.ctx(),
+          ResourceRequest(ResourceRequest::kTempSpace));
       Engine::Get()->PushAsync(
-        [reduce, result, this](RunContext rctx, Engine::CallbackOnComplete on_complete) {
+        [reduce, result, rsc, this](RunContext rctx, Engine::CallbackOnComplete on_complete) {
           NDArray out = result;
-          Resource rsc = ResourceManager::Get()->Request(rctx.ctx,
-              ResourceRequest(ResourceRequest::kTempSpace));
           is_serial_push_?
             ReduceSumCPUExSerial(reduce, &out)
             : mxnet::ndarray::ElementwiseSum(rctx.get_stream<cpu>(), rsc, reduce, &out);
           on_complete();
-        }, Context::CPU(), const_vars, {result.var()},
+        }, Context::CPU(), const_vars, {result.var(), rsc.var},
         FnProperty::kCPUPrioritized, priority, PROFILER_MESSAGE("KVStoreReduce"));
     }
 
@@ -491,11 +492,7 @@ class CommDevice : public Comm {
 
   void Init(int key, const NDArrayStorageType stype, const TShape& shape,
             int dtype = mshadow::kFloat32) override {
-    if (stype == kDefaultStorage) {
-      sorted_key_attrs_.push_back(std::make_tuple(key, shape, dtype));
-    } else {
-      LOG(FATAL) << "storage type " << stype << " not implemented for device yet";
-    }
+    sorted_key_attrs_.emplace_back(key, shape, dtype, stype);
   }
 
   void InitBuffersAndComm(const std::vector<NDArray>& src) {
@@ -528,26 +525,42 @@ class CommDevice : public Comm {
     InitBuffersAndComm(src);
     auto& buf = merge_buf_[key];
     std::vector<NDArray> reduce(src.size());
-    CopyFromTo(src[0], &(buf.merged), priority);
-    reduce[0] = buf.merged;
 
-    if (buf.copy_buf.empty()) {
-      // TODO(mli) this results in large device memory usage for huge ndarray,
-      // such as the largest fullc in VGG. consider to do segment reduce with
-      // NDArray.Slice or gpu direct memory access. for the latter, we need to
-      // remove some ctx check, and also it reduces 20% perf
-      buf.copy_buf.resize(src.size()-1);
+    const NDArrayStorageType stype = buf.merged.storage_type();
+    if (stype == kDefaultStorage) {
+      CopyFromTo(src[0], &(buf.merged), priority);
+      reduce[0] = buf.merged;
+
+      if (buf.copy_buf.empty()) {
+        // TODO(mli) this results in large device memory usage for huge ndarray,
+        // such as the largest fullc in VGG. consider to do segment reduce with
+        // NDArray.Slice or gpu direct memory access. for the latter, we need to
+        // remove some ctx check, and also it reduces 20% perf
+        buf.copy_buf.resize(src.size()-1);
+        for (size_t i = 0; i < src.size()-1; ++i) {
+          buf.copy_buf[i] = NDArray(
+            buf.merged.shape(), buf.merged.ctx(), false, buf.merged.dtype());
+        }
+      }
       for (size_t i = 0; i < src.size()-1; ++i) {
-        buf.copy_buf[i] = NDArray(
-          buf.merged.shape(), buf.merged.ctx(), false, buf.merged.dtype());
+        CopyFromTo(src[i+1], &(buf.copy_buf[i]), priority);
+        reduce[i+1] = buf.copy_buf[i];
+      }
+    } else {
+      if (buf.copy_buf.empty()) {
+        buf.copy_buf.resize(src.size());
+        for (size_t j = 0; j < src.size(); ++j) {
+          buf.copy_buf[j] = NDArray(
+            buf.merged.storage_type(), buf.merged.shape(), buf.merged.ctx(),
+            true, buf.merged.dtype());
+        }
+      }
+      for (size_t i = 0; i < src.size(); ++i) {
+        CopyFromTo(src[i], &(buf.copy_buf[i]), priority);
+        reduce[i] = buf.copy_buf[i];
       }
     }
-    for (size_t i = 0; i < src.size()-1; ++i) {
-      CopyFromTo(src[i+1], &(buf.copy_buf[i]), priority);
-      reduce[i+1] = buf.copy_buf[i];
-    }
-
-    ElementwiseSum(reduce, &buf.merged);
+    ElementwiseSum(reduce, &buf.merged, priority);
     return buf.merged;
   }
 
@@ -621,7 +634,53 @@ class CommDevice : public Comm {
                           const std::vector<std::pair<NDArray*, NDArray>>& dst,
                           const bool use_copy,
                           const int priority) override {
-    LOG(FATAL) << "Not implemented yet";
+    CHECK_EQ(src.storage_type(), kRowSparseStorage)
+      << "BroadcastRowSparse expects row-sparse src NDArray";
+
+    for (size_t i = 0; i < dst.size(); ++i) {
+      NDArray* out = dst[i].first;
+      NDArray row_id = dst[i].second;
+      if (use_copy) {
+        CopyFromTo(src, out, priority);
+      } else {
+        CHECK_EQ(out->storage_type(), kRowSparseStorage)
+                 << "BroadcastRowSparse expects row_sparse dst NDArray";
+
+        const bool is_diff_ctx = out->ctx() != src.ctx();
+        NDArray out_gpu = is_diff_ctx? NDArray(kRowSparseStorage, out->shape(),
+            src.ctx(), true, out->dtype(), out->aux_types()) : *out;
+
+        CHECK_EQ(row_id.ctx(), src.ctx())
+                << "row_id and src are expected to be on the same context";
+
+        Engine::Get()->PushAsync([=](RunContext rctx, Engine::CallbackOnComplete on_complete) {
+            NDArray temp = out_gpu;
+            const TBlob& indices = row_id.data();
+            switch (temp.ctx().dev_mask()) {
+              case cpu::kDevMask: {
+                mxnet::common::SparseRetainOpForwardRspWrapper<cpu>(rctx.get_stream<cpu>(),
+                    src, indices, kWriteTo, &temp);
+                break;
+              }
+#if MXNET_USE_CUDA
+              case gpu::kDevMask: {
+                mxnet::common::SparseRetainOpForwardRspWrapper<gpu>(rctx.get_stream<gpu>(),
+                    src, indices, kWriteTo, &temp);
+                // wait for GPU operations to complete
+                rctx.get_stream<gpu>()->Wait();
+                break;
+              }
+#endif
+              default: LOG(FATAL) << MXNET_GPU_NOT_ENABLED_ERROR;
+            }
+            on_complete();
+          }, out_gpu.ctx(), {src.var(), row_id.var()}, {out_gpu.var()},
+        FnProperty::kNormal, priority, PROFILER_MESSAGE("KVStoreSparseRetain"));
+        if (is_diff_ctx) {
+          CopyFromTo(out_gpu, out, priority);
+        }
+      }
+    }
   }
 
  private:
@@ -667,7 +726,7 @@ class CommDevice : public Comm {
 #endif
   }
 
-  using KeyAttrs = std::tuple<int, TShape, int>;
+  using KeyAttrs = std::tuple<int, TShape, int, NDArrayStorageType>;
   // try to allocate buff on device evenly
   void InitMergeBuffer(const std::vector<Context>& devs) {
     std::sort(sorted_key_attrs_.begin(), sorted_key_attrs_.end(), [](
@@ -680,9 +739,10 @@ class CommDevice : public Comm {
       ctx_info[d.dev_id] = std::make_pair(d, 0);
     }
     for (size_t i = 0; i < sorted_key_attrs_.size(); ++i) {
-      int key  = std::get<0>(sorted_key_attrs_[i]);
-      TShape s = std::get<1>(sorted_key_attrs_[i]);
-      int type = std::get<2>(sorted_key_attrs_[i]);
+      const int key  = std::get<0>(sorted_key_attrs_[i]);
+      const TShape& shape = std::get<1>(sorted_key_attrs_[i]);
+      const int type = std::get<2>(sorted_key_attrs_[i]);
+      const NDArrayStorageType stype = std::get<3>(sorted_key_attrs_[i]);
       auto& buf = merge_buf_[key];
       Context ctx;
       size_t min_size = std::numeric_limits<size_t>::max();
@@ -693,8 +753,12 @@ class CommDevice : public Comm {
           min_size = size;
         }
       }
-      buf.merged = NDArray(s, ctx, false, type);
-      ctx_info[ctx.dev_id].second += s.Size();
+      if (stype == kDefaultStorage) {
+        buf.merged = NDArray(shape, ctx, false, type);
+      } else {
+        buf.merged = NDArray(stype, shape, ctx, true, type);
+      }
+      ctx_info[ctx.dev_id].second += shape.Size();
     }
     inited_ = true;
   }
diff --git a/src/kvstore/kvstore_local.h b/src/kvstore/kvstore_local.h
index 1bb84fdc11..78b6c8f231 100644
--- a/src/kvstore/kvstore_local.h
+++ b/src/kvstore/kvstore_local.h
@@ -34,6 +34,7 @@
 #include <functional>
 #include <algorithm>
 #include "./comm.h"
+#include "./utils.h"
 
 namespace mxnet {
 namespace kvstore {
@@ -223,12 +224,12 @@ class KVStoreLocal : public KVStore {
                << "PullRowSparse expects row_sparse src NDArray";
       auto &target_val_rowids = grouped_val_rowids[i];
       const size_t num_vals = target_val_rowids.size();
-      for (size_t i = 0; i < num_vals; i++) {
-        auto &row_id = target_val_rowids[i].second;
-        NDArray indices(row_id.shape(), pinned_ctx_, false, mshadow::kInt64);
+      for (size_t j = 0; j < num_vals; j++) {
+        auto &row_id = target_val_rowids[j].second;
+        NDArray indices(row_id.shape(), local.ctx(), false, mshadow::kInt64);
         CopyFromTo(row_id, &indices, 0);
         Unique(&indices, priority);
-        target_val_rowids[i].second = indices;
+        target_val_rowids[j].second = indices;
       }
       comm_->BroadcastRowSparse(key, local, grouped_val_rowids[i], false, priority);
     }
@@ -354,29 +355,41 @@ class KVStoreLocal : public KVStore {
   }
 
   /**
-   * \brief sort and get unique values. Output is expected to be on cpu_pinned context
+   * \brief sort and get unique values.
    */
-  void Unique(NDArray *out, int priority = 0) {
-    CHECK_EQ(out->ctx().dev_mask(), pinned_ctx_.dev_mask())
-             << "Unique expects input with `pinned_ctx_`";
+  void Unique(NDArray *out, int priority) {
+    Resource rsc = ResourceManager::Get()->Request(out->ctx(),
+      ResourceRequest(ResourceRequest::kTempSpace));
     Engine::Get()->PushAsync(
-      [out](RunContext rctx, Engine::CallbackOnComplete on_complete) {
+      [rsc, out](RunContext rctx, Engine::CallbackOnComplete on_complete) {
         NDArray *output = out;
         CHECK_EQ(out->shape().ndim(), 1) << "Unique expects 1D inputs";
-        const auto size = out->shape()[0];
-        auto out_data = output->data();
-        MSHADOW_IDX_TYPE_SWITCH(out_data.type_flag_, IType, {
-          auto dptr = output->data().dptr<IType>();
-          common::ParallelSort(dptr, dptr + size, omp_get_max_threads());
-          auto num_unique_idx = std::unique(dptr, dptr + size) - dptr;
-          *output = output->Reshape(mshadow::Shape1(num_unique_idx));
-        });
+        nnvm::dim_t size = out->shape()[0];
+        switch (out->ctx().dev_mask()) {
+          case cpu::kDevMask: {
+            mshadow::Stream<cpu> *s = rctx.get_stream<cpu>();
+            UniqueImpl(rsc, s, output, size);
+            break;
+          }
+  #if MXNET_USE_CUDA
+          case gpu::kDevMask: {
+            mshadow::Stream<gpu> *s = rctx.get_stream<gpu>();
+            UniqueImpl(rsc, s, output, size);
+            // wait for GPU operations to complete
+            s->Wait();
+            break;
+          }
+  #endif
+          default:
+            LOG(FATAL) << "GPU not enabled.";
+        }
         on_complete();
-      }, pinned_ctx_, {}, {out->var()},
-      FnProperty::kCPUPrioritized, priority, PROFILER_MESSAGE("KVStoreUnique"));
+      }, out->ctx(), {}, {out->var(), rsc.var},
+      FnProperty::kNormal, priority, PROFILER_MESSAGE("KVStoreUnique"));
     out->WaitToRead();
   }
 
+
   /// reducer and broadcaster
   Comm* comm_;
   /// pinned context
diff --git a/src/kvstore/utils.cc b/src/kvstore/utils.cc
new file mode 100644
index 0000000000..c22553f3b6
--- /dev/null
+++ b/src/kvstore/utils.cc
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file utils.cc
+ * \brief cpu implementation of util functions
+ */
+
+#include "./utils.h"
+#include "../common/utils.h"
+
+namespace mxnet {
+namespace kvstore {
+
+
+template<>
+void UniqueImpl<cpu>(const Resource& rsc, mshadow::Stream<cpu> *s,
+                     NDArray *out, nnvm::dim_t size) {
+  MSHADOW_IDX_TYPE_SWITCH(out->data().type_flag_, IType, {
+    IType *dptr = out->data().dptr<IType>();
+    common::ParallelSort(dptr, dptr + size, omp_get_max_threads());
+    size_t num_unique_idx = std::unique(dptr, dptr + size) - dptr;
+    *out = out->Reshape(mshadow::Shape1(num_unique_idx));
+  });
+}
+
+
+}  // namespace kvstore
+}  // namespace mxnet
diff --git a/src/kvstore/utils.cu b/src/kvstore/utils.cu
new file mode 100644
index 0000000000..088a49efc8
--- /dev/null
+++ b/src/kvstore/utils.cu
@@ -0,0 +1,102 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ *  Copyright (c) 2017 by Contributors
+ * \file utils.cu
+ * \brief gpu implementation of util functions
+ */
+#if defined(_MSC_VER) && __CUDACC_VER_MAJOR__ == 8 && __CUDACC_VER_BUILD__ != 44
+// Many CUDA 8 compilers other than V8.0.44 crash on Windows
+#pragma warning("Potential crash on CUDA compiler detected. Switching sorting from CUB to Thrust")
+#define SORT_WITH_THRUST
+#include <thrust/device_ptr.h>
+#include <thrust/sort.h>
+#include <thrust/system/cuda/execution_policy.h>
+#else
+#undef SORT_WITH_THRUST
+#endif
+#include "./utils.h"
+#include "../common/utils.h"
+#include <cub/cub.cuh>
+#include <mxnet/resource.h>
+
+namespace mxnet {
+namespace kvstore {
+
+
+template<typename IType>
+size_t UniqueImplGPU(const Resource& rsc, mshadow::Stream<gpu> *s,
+                     IType *dptr, nnvm::dim_t size) {
+#ifndef SORT_WITH_THRUST
+  size_t sort_temp_bytes = 0;
+  cub::DeviceRadixSort::SortKeys(NULL, sort_temp_bytes,
+    dptr, dptr, size, 0, sizeof(IType)*8, mshadow::Stream<gpu>::GetStream(s));
+  mshadow::Tensor<gpu, 1, char> sort_space = rsc
+    .get_space_typed<gpu, 1, char>(
+      mshadow::Shape1(sort_temp_bytes), s);
+  void *sort_temp_storage = static_cast<void*>(sort_space.dptr_);
+  cub::DeviceRadixSort::SortKeys(sort_temp_storage, sort_temp_bytes,
+    dptr, dptr, size, 0, sizeof(IType)*8, mshadow::Stream<gpu>::GetStream(s));
+#else
+  thrust::sort(thrust::cuda::par.on(mshadow::Stream<gpu>::GetStream(s)),
+    dptr, dptr + size, thrust::greater<IType>());
+#endif
+  size_t unique_temp_bytes = 0;
+  mshadow::Tensor<gpu, 1, char> dummy_space = rsc
+    .get_space_typed<gpu, 1, char>(
+      mshadow::Shape1(sizeof(size_t)), s);
+  size_t *dummy_ptr = reinterpret_cast<size_t*>(dummy_space.dptr_);
+  cub::DeviceSelect::Unique(NULL, unique_temp_bytes, dptr, dptr,
+    dummy_ptr, size, mshadow::Stream<gpu>::GetStream(s));
+
+  mshadow::Tensor<gpu, 1, char> unique_space = rsc
+    .get_space_typed<gpu, 1, char>(
+      mshadow::Shape1((unique_temp_bytes + sizeof(size_t) + 7) / 8 * 8), s);
+
+  void *unique_temp_storage = static_cast<void*>(
+    unique_space.dptr_);
+  size_t *d_num_selected_out = reinterpret_cast<size_t*>(
+    unique_space.dptr_ + (unique_temp_bytes + 7) / 8 * 8);
+
+  cub::DeviceSelect::Unique(unique_temp_storage, unique_temp_bytes, dptr, dptr,
+    d_num_selected_out, size, mshadow::Stream<gpu>::GetStream(s));
+
+  size_t num_selected_out = 0;
+  CUDA_CALL(cudaMemcpy(&num_selected_out, d_num_selected_out, sizeof(size_t),
+     cudaMemcpyDeviceToHost));
+  return num_selected_out;
+}
+
+/*!
+ * \brief sort and get unique values.
+ */
+template<>
+void UniqueImpl<gpu>(const Resource& rsc, mshadow::Stream<gpu> *s,
+                     NDArray *out, nnvm::dim_t size) {
+  MSHADOW_IDX_TYPE_SWITCH(out->data().type_flag_, IType, {
+    IType *dptr = out->data().dptr<IType>();
+    size_t num_selected_out = UniqueImplGPU(rsc, s, dptr, size);
+    *out = out->Reshape(mshadow::Shape1(num_selected_out));
+  });
+}
+
+
+}  // namespace kvstore
+}  // namespace mxnet
diff --git a/src/kvstore/utils.h b/src/kvstore/utils.h
new file mode 100644
index 0000000000..75473452ce
--- /dev/null
+++ b/src/kvstore/utils.h
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file utils.h
+ * \brief Basic utilility functions.
+ */
+#ifndef MXNET_KVSTORE_UTILS_H_
+#define MXNET_KVSTORE_UTILS_H_
+
+#include <dmlc/logging.h>
+#include <mxnet/ndarray.h>
+#include <mxnet/resource.h>
+#include <utility>
+#include <vector>
+
+namespace mxnet {
+namespace kvstore {
+
+
+/*!
+ * \brief sort and get unique values.
+ */
+template<typename xpu>
+void UniqueImpl(const Resource& rsc, mshadow::Stream<xpu> *s,
+                NDArray *out, nnvm::dim_t size);
+
+}  // namespace kvstore
+}  // namespace mxnet
+
+#endif  // MXNET_KVSTORE_UTILS_H_
diff --git a/src/ndarray/ndarray.cc b/src/ndarray/ndarray.cc
index 8a3bb8d59b..4db314f9cf 100644
--- a/src/ndarray/ndarray.cc
+++ b/src/ndarray/ndarray.cc
@@ -623,36 +623,66 @@ void ElementwiseSum(const std::vector<NDArray> &source, NDArray *out, int priori
   // important: callback must always capture by value
   NDArray ret = *out;
 
-  switch (out->ctx().dev_mask()) {
-    case cpu::kDevMask: {
-      Engine::Get()->PushSync([source, ret](RunContext ctx) {
-          std::vector<TBlob> source_tblob(source.size());
-          for (size_t i = 0; i < source.size(); ++i) {
-            source_tblob[i] = source[i].data();
-          }
-          TBlob tmp = ret.data();
-          ndarray::ElementwiseSum<cpu>(source_tblob, &tmp, ctx);
-        }, out->ctx(), const_vars, {ret.var()},
-        FnProperty::kNormal, priority, PROFILER_MESSAGE_FUNCNAME);
-      break;
+  const NDArrayStorageType stype = ret.storage_type();
+
+  if (stype == kDefaultStorage) {
+    switch (out->ctx().dev_mask()) {
+      case cpu::kDevMask: {
+        Engine::Get()->PushSync([source, ret](RunContext ctx) {
+            std::vector<TBlob> source_tblob(source.size());
+            for (size_t i = 0; i < source.size(); ++i) {
+              source_tblob[i] = source[i].data();
+            }
+            TBlob tmp = ret.data();
+            ndarray::ElementwiseSum<cpu>(source_tblob, &tmp, ctx);
+          }, out->ctx(), const_vars, {ret.var()},
+          FnProperty::kNormal, priority, PROFILER_MESSAGE_FUNCNAME);
+        break;
+      }
+#if MXNET_USE_CUDA
+      case gpu::kDevMask: {
+        Engine::Get()->PushSync([source, ret](RunContext ctx) {
+            std::vector<TBlob> source_tblob(source.size());
+            for (size_t i = 0; i < source.size(); ++i) {
+              source_tblob[i] = source[i].data();
+            }
+            TBlob tmp = ret.data();
+            ndarray::ElementwiseSum<gpu>(source_tblob, &tmp, ctx);
+            // Wait GPU kernel to complete
+            ctx.get_stream<gpu>()->Wait();
+          }, out->ctx(), const_vars, {ret.var()},
+          FnProperty::kNormal, priority, PROFILER_MESSAGE("DenseElementwiseSum"));
+        break;
+      }
+#endif
+      default: LOG(FATAL) << MXNET_GPU_NOT_ENABLED_ERROR;
     }
+  } else if (stype == kRowSparseStorage) {
+    Resource rsc = ResourceManager::Get()->Request(ret.ctx(),
+      ResourceRequest(ResourceRequest::kTempSpace));
+
+    Engine::Get()->PushSync(
+      [source, ret, rsc](RunContext rctx) {
+        NDArray result = ret;
+        switch (ret.ctx().dev_mask()) {
+          case cpu::kDevMask: {
+            mxnet::ndarray::ElementwiseSum(rctx.get_stream<cpu>(), rsc, source, &result);
+            break;
+          }
 #if MXNET_USE_CUDA
-    case gpu::kDevMask: {
-      Engine::Get()->PushSync([source, ret](RunContext ctx) {
-          std::vector<TBlob> source_tblob(source.size());
-          for (size_t i = 0; i < source.size(); ++i) {
-            source_tblob[i] = source[i].data();
+          case gpu::kDevMask: {
+            mxnet::ndarray::ElementwiseSum(rctx.get_stream<gpu>(), rsc, source, &result);
+            // wait for GPU operations to complete
+            rctx.get_stream<gpu>()->Wait();
+            break;
           }
-          TBlob tmp = ret.data();
-          ndarray::ElementwiseSum<gpu>(source_tblob, &tmp, ctx);
-          // Wait GPU kernel to complete
-          ctx.get_stream<gpu>()->Wait();
-        }, out->ctx(), const_vars, {ret.var()},
-        FnProperty::kNormal, priority, PROFILER_MESSAGE_FUNCNAME);
-      break;
-    }
 #endif
-    default: LOG(FATAL) << MXNET_GPU_NOT_ENABLED_ERROR;
+          default: LOG(FATAL) << MXNET_GPU_NOT_ENABLED_ERROR;
+        }
+      }, ret.ctx(), const_vars, {ret.var(), rsc.var},
+    FnProperty::kNormal, priority, PROFILER_MESSAGE("RowSparseElementwiseSum"));
+  } else {
+    LOG(FATAL) << "Not implemented for storage_type " << common::stype_string(stype);
   }
 }
 
diff --git a/src/operator/operator_tune-inl.h b/src/operator/operator_tune-inl.h
index d4eec999f9..2dfc103b10 100644
--- a/src/operator/operator_tune-inl.h
+++ b/src/operator/operator_tune-inl.h
@@ -616,7 +616,7 @@ class UnaryOpTune : public OperatorTune<DType> {
    */
   template<typename OP>
   static duration_t GetBlankWorkloadEx() {
-    std::unique_ptr<DType> tmp(new DType[Super::WORKLOAD_COUNT]);
+    std::unique_ptr<DType[]> tmp(new DType[Super::WORKLOAD_COUNT]);
     DType *tmp_ptr = tmp.get();
     const Tick start = std::chrono::high_resolution_clock::now();
     for (size_t i = 0; i < Super::WORKLOAD_COUNT; ++i) {
diff --git a/tests/python/gpu/test_kvstore_gpu.py b/tests/python/gpu/test_kvstore_gpu.py
index 20528be664..3249c98b2d 100644
--- a/tests/python/gpu/test_kvstore_gpu.py
+++ b/tests/python/gpu/test_kvstore_gpu.py
@@ -26,9 +26,9 @@
 str_keys = ['b', 'c', 'd']
 
 
-def init_kv_with_str(stype='default'):
+def init_kv_with_str(stype='default', kv_type='local'):
     """init kv """
-    kv = mx.kv.create()
+    kv = mx.kv.create(kv_type)
     # single
     kv.init('a', mx.nd.zeros(shape, stype=stype))
     # list
@@ -36,34 +36,54 @@ def init_kv_with_str(stype='default'):
     return kv
 
 
-def test_row_sparse_pull():
-    kv = init_kv_with_str('row_sparse')
-    kv.init('e', mx.nd.ones(shape).tostype('row_sparse'))
+def test_rsp_push_pull():
+    def check_rsp_push_pull(kv_type, is_push_cpu=True):
+        kv = init_kv_with_str('row_sparse', kv_type)
+        kv.init('e', mx.nd.ones(shape).tostype('row_sparse'))
+        push_ctxs = [mx.cpu(i) if is_push_cpu else mx.gpu(i) for i in range(2)]
+        kv.push('e', [mx.nd.ones(shape, ctx=context).tostype('row_sparse') for context in push_ctxs])
 
-    def check_row_sparse_pull(kv, count, ctx=default_context()):
-        num_rows = shape[0]
-        vals = []
-        row_ids = []
-        all_row_ids = np.arange(num_rows)
-        for i in range(count):
-            vals.append(mx.nd.zeros(shape, ctx=ctx).tostype('row_sparse'))
-            row_id = np.random.randint(num_rows, size=num_rows)
-            row_ids.append(mx.nd.array(row_id, dtype='int64'))
-        row_ids_to_pull = row_ids[0] if len(row_ids) == 1 else row_ids
-        vals_to_pull = vals[0] if len(vals) == 1 else vals
+        def check_rsp_pull(kv, count, ctxs, is_same_rowid=False, use_slice=False):
+            num_rows = shape[0]
+            row_ids = []
+            all_row_ids = np.arange(num_rows)
+            vals = [mx.nd.sparse.zeros(shape=shape, ctx=ctxs[i], stype='row_sparse') for i in range(count)]
+            if is_same_rowid:
+                row_id = np.random.randint(num_rows, size=num_rows)
+                row_ids = [mx.nd.array(row_id, dtype='int64')] * count
+            elif use_slice:
+                total_row_ids = mx.nd.array(np.random.randint(num_rows, size=count*num_rows), dtype='int64')
+                row_ids = [total_row_ids[i*num_rows : (i+1)*num_rows] for i in range(count)]
+            else:
+                for i in range(count):
+                    row_id = np.random.randint(num_rows, size=num_rows)
+                    row_ids.append(mx.nd.array(row_id, dtype='int64'))
+            row_ids_to_pull = row_ids[0] if (len(row_ids) == 1 or is_same_rowid) else row_ids
+            vals_to_pull = vals[0] if len(vals) == 1 else vals
 
-        kv.row_sparse_pull('e', out=vals_to_pull, row_ids=row_ids_to_pull)
-        for val, row_id in zip(vals, row_ids):
-            retained = val.asnumpy()
-            excluded_row_ids = np.setdiff1d(all_row_ids, row_id.asnumpy())
-            for row in range(num_rows):
-                expected_val = np.zeros_like(retained[row])
-                expected_val += 0 if row in excluded_row_ids else 1
-                assert_almost_equal(retained[row], expected_val)
+            kv.row_sparse_pull('e', out=vals_to_pull, row_ids=row_ids_to_pull)
+            for val, row_id in zip(vals, row_ids):
+                retained = val.asnumpy()
+                excluded_row_ids = np.setdiff1d(all_row_ids, row_id.asnumpy())
+                for row in range(num_rows):
+                    expected_val = np.zeros_like(retained[row])
+                    expected_val += 0 if row in excluded_row_ids else 2
+                    assert_almost_equal(retained[row], expected_val)
 
-    check_row_sparse_pull(kv, 1, mx.gpu(0))
-    check_row_sparse_pull(kv, 4, mx.gpu(0))
+        check_rsp_pull(kv, 1, [mx.gpu(0)])
+        check_rsp_pull(kv, 1, [mx.cpu(0)])
+        check_rsp_pull(kv, 4, [mx.gpu(i//2) for i in range(4)])
+        check_rsp_pull(kv, 4, [mx.gpu(i//2) for i in range(4)], is_same_rowid=True)
+        check_rsp_pull(kv, 4, [mx.cpu(i) for i in range(4)])
+        check_rsp_pull(kv, 4, [mx.cpu(i) for i in range(4)], is_same_rowid=True)
+        check_rsp_pull(kv, 4, [mx.gpu(i//2) for i in range(4)], use_slice=True) 
+        check_rsp_pull(kv, 4, [mx.cpu(i) for i in range(4)], use_slice=True)
+
+    # test fails intermittently. temporarily disabled till it gets fixed. tracked at https://github.com/apache/incubator-mxnet/issues/9384
+    # check_rsp_push_pull('local')
+    check_rsp_push_pull('device')
+    check_rsp_push_pull('device', is_push_cpu=False)
 
 
 if __name__ == '__main__':
-    test_row_sparse_pull()
+    test_rsp_push_pull()
diff --git a/tests/python/gpu/test_nccl.py b/tests/python/gpu/test_nccl.py
index fd27c0b58b..cc11eddebd 100644
--- a/tests/python/gpu/test_nccl.py
+++ b/tests/python/gpu/test_nccl.py
@@ -21,7 +21,18 @@
 
 shapes = [(10), (100), (1000), (10000), (100000), (2,2), (2,3,4,5,6,7,8)]
 keys = [1,2,3,4,5,6,7]
-gpus = range(1,1+len(mx.test_utils.list_gpus()))
+num_gpus = len(mx.test_utils.list_gpus())
+
+#TODO The test is capped to run with 8 GPUs only. This is due to a bug in NCCL 2.1
+#It is addressed in github issue https://github.com/apache/incubator-mxnet/issues/9004
+#Remove this constraint when the github issue is fixed.
+
+if num_gpus > 8 :
+    print("The machine has {} gpus. We will run the test on 8 gpus only due to a bug in NCCL 2.1.".format(num_gpus))
+    print("Please remove this limitation when issue #9004 is fixed.")
+    num_gpus = 8;
+
+gpus = range(1,1+num_gpus)
 
 @unittest.skip("Test requires NCCL library installed and enabled during build")
 def test_nccl_pushpull():
diff --git a/tests/python/unittest/test_contrib_text.py b/tests/python/unittest/test_contrib_text.py
new file mode 100644
index 0000000000..f666888150
--- /dev/null
+++ b/tests/python/unittest/test_contrib_text.py
@@ -0,0 +1,727 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# 'License'); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# 'AS IS' BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# coding: utf-8
+
+from __future__ import absolute_import
+from __future__ import print_function
+
+from collections import Counter
+import unittest
+
+from common import assertRaises
+from mxnet import ndarray as nd
+from mxnet.test_utils import *
+from mxnet.contrib.text import utils
+from mxnet.contrib.text.glossary import Glossary
+from mxnet.contrib.text.indexer import TokenIndexer
+from mxnet.contrib.text.embedding import TokenEmbedding, CustomEmbedding
+
+
+def _get_test_str_of_tokens(token_delim, seq_delim):
+    seq1 = token_delim + token_delim.join(['Life', 'is', 'great', '!']) \
+           + token_delim + seq_delim
+    seq2 = token_delim + token_delim.join(['life', 'is', 'good', '.']) \
+           + token_delim + seq_delim
+    seq3 = token_delim + token_delim.join(['life', "isn't", 'bad', '.']) \
+           + token_delim + seq_delim
+    seqs = seq1 + seq2 + seq3
+    return seqs
+
+
+def _test_count_tokens_from_str_with_delims(token_delim, seq_delim):
+    source_str = _get_test_str_of_tokens(token_delim, seq_delim)
+
+    cnt1 = utils.count_tokens_from_str(source_str, token_delim, seq_delim,
+                                       to_lower=False)
+    assert cnt1 == Counter(
+        {'is': 2, 'life': 2, '.': 2, 'Life': 1, 'great': 1, '!': 1, 'good': 1,
+         "isn't": 1, 'bad': 1})
+
+    cnt2 = utils.count_tokens_from_str(source_str, token_delim, seq_delim,
+                                       to_lower=True)
+    assert cnt2 == Counter(
+        {'life': 3, 'is': 2, '.': 2, 'great': 1, '!': 1, 'good': 1,
+         "isn't": 1, 'bad': 1})
+
+    counter_to_update = Counter({'life': 2})
+
+    cnt3 = utils.count_tokens_from_str(
+        source_str, token_delim, seq_delim, to_lower=False,
+        counter_to_update=counter_to_update.copy())
+    assert cnt3 == Counter(
+        {'is': 2, 'life': 4, '.': 2, 'Life': 1, 'great': 1, '!': 1, 'good': 1,
+         "isn't": 1, 'bad': 1})
+
+    cnt4 = utils.count_tokens_from_str(
+        source_str, token_delim, seq_delim, to_lower=True,
+        counter_to_update=counter_to_update.copy())
+    assert cnt4 == Counter(
+        {'life': 5, 'is': 2, '.': 2, 'great': 1, '!': 1, 'good': 1,
+         "isn't": 1, 'bad': 1})
+
+
+def test_count_tokens_from_str():
+    _test_count_tokens_from_str_with_delims(' ', '\n')
+    _test_count_tokens_from_str_with_delims('IS', 'LIFE')
+
+
+def test_tokens_to_indices():
+    counter = Counter(['a', 'b', 'b', 'c', 'c', 'c', 'some_word$'])
+
+    indexer = TokenIndexer(counter, most_freq_count=None, min_freq=1,
+                           unknown_token='<unk>', reserved_tokens=None)
+
+    i1 = indexer.to_indices('c')
+    assert i1 == 1
+
+    i2 = indexer.to_indices(['c'])
+    assert i2 == [1]
+
+    i3 = indexer.to_indices(['<unk>', 'non-exist'])
+    assert i3 == [0, 0]
+
+    i4 = indexer.to_indices(['a', 'non-exist', 'a', 'b'])
+    assert i4 == [3, 0, 3, 2]
+
+
+def test_indices_to_tokens():
+    counter = Counter(['a', 'b', 'b', 'c', 'c', 'c', 'some_word$'])
+
+    indexer = TokenIndexer(counter, most_freq_count=None, min_freq=1,
+                           unknown_token='<unknown>', reserved_tokens=None)
+
+    i1 = indexer.to_tokens(1)
+    assert i1 == 'c'
+
+    i2 = indexer.to_tokens([1])
+    assert i2 == ['c']
+
+    i3 = indexer.to_tokens([0, 0])
+    assert i3 == ['<unknown>', '<unknown>']
+
+    i4 = indexer.to_tokens([3, 0, 3, 2])
+    assert i4 == ['a', '<unknown>', 'a', 'b']
+
+    assertRaises(ValueError, indexer.to_tokens, 100)
+
+def test_download_embed():
+    @TokenEmbedding.register
+    class Test(TokenEmbedding):
+        pretrained_file_name_sha1 = \
+            {'embedding_test.vec': '29b9a6511cf4b5aae293c44a9ec1365b74f2a2f8'} # 33 bytes
+        namespace = 'test'
+
+        def __init__(self, embedding_root='embeddings',
+                     init_unknown_vec=nd.zeros, **kwargs):
+            pretrained_file_name = 'embedding_test.vec'
+            Test._check_pretrained_file_names(pretrained_file_name)
+
+            super(Test, self).__init__(**kwargs)
+
+            pretrained_file_path = Test._get_pretrained_file(embedding_root,
+                                                             pretrained_file_name)
+
+            self._load_embedding(pretrained_file_path, ' ', init_unknown_vec)
+
+    test_embed = TokenEmbedding.create('test')
+    assert test_embed.token_to_idx['hello'] == 1
+    assert test_embed.token_to_idx['world'] == 2
+    assert_almost_equal(test_embed.idx_to_vec[1].asnumpy(), (nd.arange(5) + 1).asnumpy())
+    assert_almost_equal(test_embed.idx_to_vec[2].asnumpy(), (nd.arange(5) + 6).asnumpy())
+    assert_almost_equal(test_embed.idx_to_vec[0].asnumpy(), nd.zeros((5,)).asnumpy())
+
+
+
+def _mk_my_pretrain_file(path, token_delim, pretrain_file):
+    path = os.path.expanduser(path)
+    if not os.path.exists(path):
+        os.makedirs(path)
+    seq1 = token_delim.join(['a', '0.1', '0.2', '0.3', '0.4', '0.5']) + '\n'
+    seq2 = token_delim.join(['b', '0.6', '0.7', '0.8', '0.9', '1.0']) + '\n'
+    seqs = seq1 + seq2
+    with open(os.path.join(path, pretrain_file), 'w') as fout:
+        fout.write(seqs)
+
+
+def _mk_my_pretrain_file2(path, token_delim, pretrain_file):
+    path = os.path.expanduser(path)
+    if not os.path.exists(path):
+        os.makedirs(path)
+    seq1 = token_delim.join(['a', '0.01', '0.02', '0.03', '0.04',
+                             '0.05']) + '\n'
+    seq2 = token_delim.join(['c', '0.06', '0.07', '0.08', '0.09', '0.1']) + '\n'
+    seqs = seq1 + seq2
+    with open(os.path.join(path, pretrain_file), 'w') as fout:
+        fout.write(seqs)
+
+
+def _mk_my_pretrain_file3(path, token_delim, pretrain_file):
+    path = os.path.expanduser(path)
+    if not os.path.exists(path):
+        os.makedirs(path)
+    seq1 = token_delim.join(['a', '0.1', '0.2', '0.3', '0.4', '0.5']) + '\n'
+    seq2 = token_delim.join(['b', '0.6', '0.7', '0.8', '0.9', '1.0']) + '\n'
+    seq3 = token_delim.join(['<unk1>', '1.1', '1.2', '1.3', '1.4',
+                             '1.5']) + '\n'
+    seqs = seq1 + seq2 + seq3
+    with open(os.path.join(path, pretrain_file), 'w') as fout:
+        fout.write(seqs)
+
+
+def _mk_my_pretrain_file4(path, token_delim, pretrain_file):
+    path = os.path.expanduser(path)
+    if not os.path.exists(path):
+        os.makedirs(path)
+    seq1 = token_delim.join(['a', '0.01', '0.02', '0.03', '0.04',
+                             '0.05']) + '\n'
+    seq2 = token_delim.join(['c', '0.06', '0.07', '0.08', '0.09',
+                             '0.1']) + '\n'
+    seq3 = token_delim.join(['<unk2>', '0.11', '0.12', '0.13', '0.14',
+                             '0.15']) + '\n'
+    seqs = seq1 + seq2 + seq3
+    with open(os.path.join(path, pretrain_file), 'w') as fout:
+        fout.write(seqs)
+
+
+def _mk_my_invalid_pretrain_file(path, token_delim, pretrain_file):
+    path = os.path.expanduser(path)
+    if not os.path.exists(path):
+        os.makedirs(path)
+    seq1 = token_delim.join(['a', '0.1', '0.2', '0.3', '0.4', '0.5']) + '\n'
+    seq2 = token_delim.join(['b', '0.6', '0.7', '0.8', '0.9', '1.0']) + '\n'
+    seq3 = token_delim.join(['c']) + '\n'
+    seqs = seq1 + seq2 + seq3
+    with open(os.path.join(path, pretrain_file), 'w') as fout:
+        fout.write(seqs)
+
+
+def _mk_my_invalid_pretrain_file2(path, token_delim, pretrain_file):
+    path = os.path.expanduser(path)
+    if not os.path.exists(path):
+        os.makedirs(path)
+    seq1 = token_delim.join(['a', '0.1', '0.2', '0.3', '0.4', '0.5']) + '\n'
+    seq2 = token_delim.join(['b', '0.6', '0.7', '0.8', '0.9', '1.0']) + '\n'
+    seq3 = token_delim.join(['c', '0.6', '0.7', '0.8']) + '\n'
+    seqs = seq1 + seq2 + seq3
+    with open(os.path.join(path, pretrain_file), 'w') as fout:
+        fout.write(seqs)
+
+
+def test_custom_embed():
+    embed_root = 'embeddings'
+    embed_name = 'my_embed'
+    elem_delim = '\t'
+    pretrain_file = 'my_pretrain_file.txt'
+
+    _mk_my_pretrain_file(os.path.join(embed_root, embed_name), elem_delim,
+                         pretrain_file)
+
+    pretrain_file_path = os.path.join(embed_root, embed_name, pretrain_file)
+
+    my_embed = CustomEmbedding(pretrain_file_path, elem_delim)
+
+    assert len(my_embed) == 3
+    assert my_embed.vec_len == 5
+    assert my_embed.token_to_idx['a'] == 1
+    assert my_embed.idx_to_token[1] == 'a'
+
+    first_vec = my_embed.idx_to_vec[0]
+    assert_almost_equal(first_vec.asnumpy(), np.array([0, 0, 0, 0, 0]))
+
+    unk_vec = my_embed.get_vecs_by_tokens('A')
+    assert_almost_equal(unk_vec.asnumpy(), np.array([0, 0, 0, 0, 0]))
+
+    a_vec = my_embed.get_vecs_by_tokens('A', lower_case_backup=True)
+    assert_almost_equal(a_vec.asnumpy(), np.array([0.1, 0.2, 0.3, 0.4, 0.5]))
+
+    unk_vecs = my_embed.get_vecs_by_tokens(['<un...@unk>', '<un...@unk>'])
+    assert_almost_equal(unk_vecs.asnumpy(),
+                        np.array([[0, 0, 0, 0, 0],
+                                  [0, 0, 0, 0, 0]]))
+
+    # Test loaded unknown vectors.
+    pretrain_file2 = 'my_pretrain_file2.txt'
+    _mk_my_pretrain_file3(os.path.join(embed_root, embed_name), elem_delim,
+                          pretrain_file2)
+    pretrain_file_path = os.path.join(embed_root, embed_name, pretrain_file2)
+    my_embed2 = CustomEmbedding(pretrain_file_path, elem_delim,
+                                init_unknown_vec=nd.ones,
+                                unknown_token='<unk>')
+    unk_vec2 = my_embed2.get_vecs_by_tokens('<unk>')
+    assert_almost_equal(unk_vec2.asnumpy(), np.array([1, 1, 1, 1, 1]))
+    unk_vec2 = my_embed2.get_vecs_by_tokens('<un...@unk>')
+    assert_almost_equal(unk_vec2.asnumpy(), np.array([1, 1, 1, 1, 1]))
+
+    my_embed3 = CustomEmbedding(pretrain_file_path, elem_delim,
+                                init_unknown_vec=nd.ones,
+                                unknown_token='<unk1>')
+    unk_vec3 = my_embed3.get_vecs_by_tokens('<unk1>')
+    assert_almost_equal(unk_vec3.asnumpy(), np.array([1.1, 1.2, 1.3, 1.4, 1.5]))
+    unk_vec3 = my_embed3.get_vecs_by_tokens('<un...@unk>')
+    assert_almost_equal(unk_vec3.asnumpy(), np.array([1.1, 1.2, 1.3, 1.4, 1.5]))
+
+    # Test error handling.
+    invalid_pretrain_file = 'invalid_pretrain_file.txt'
+    _mk_my_invalid_pretrain_file(os.path.join(embed_root, embed_name),
+                                 elem_delim, invalid_pretrain_file)
+    pretrain_file_path = os.path.join(embed_root, embed_name,
+                                      invalid_pretrain_file)
+    assertRaises(AssertionError, CustomEmbedding, pretrain_file_path,
+                 elem_delim)
+
+    invalid_pretrain_file2 = 'invalid_pretrain_file2.txt'
+    _mk_my_invalid_pretrain_file2(os.path.join(embed_root, embed_name),
+                                  elem_delim, invalid_pretrain_file2)
+    pretrain_file_path = os.path.join(embed_root, embed_name,
+                                      invalid_pretrain_file2)
+    assertRaises(AssertionError, CustomEmbedding, pretrain_file_path,
+                 elem_delim)
+
+
+def test_token_indexer():
+    counter = Counter(['a', 'b', 'b', 'c', 'c', 'c', 'some_word$'])
+
+    g1 = TokenIndexer(counter, most_freq_count=None, min_freq=1,
+                      unknown_token='<unk>', reserved_tokens=None)
+    assert len(g1) == 5
+    assert g1.token_to_idx == {'<unk>': 0, 'c': 1, 'b': 2, 'a': 3,
+                               'some_word$': 4}
+    assert g1.idx_to_token[1] == 'c'
+    assert g1.unknown_token == '<unk>'
+    assert g1.reserved_tokens is None
+
+    g2 = TokenIndexer(counter, most_freq_count=None, min_freq=2,
+                      unknown_token='<unk>', reserved_tokens=None)
+    assert len(g2) == 3
+    assert g2.token_to_idx == {'<unk>': 0, 'c': 1, 'b': 2}
+    assert g2.idx_to_token[1] == 'c'
+    assert g2.unknown_token == '<unk>'
+    assert g2.reserved_tokens is None
+
+    g3 = TokenIndexer(counter, most_freq_count=None, min_freq=100,
+                      unknown_token='<unk>', reserved_tokens=None)
+    assert len(g3) == 1
+    assert g3.token_to_idx == {'<unk>': 0}
+    assert g3.idx_to_token[0] == '<unk>'
+    assert g3.unknown_token == '<unk>'
+    assert g3.reserved_tokens is None
+
+    g4 = TokenIndexer(counter, most_freq_count=2, min_freq=1,
+                      unknown_token='<unk>', reserved_tokens=None)
+    assert len(g4) == 3
+    assert g4.token_to_idx == {'<unk>': 0, 'c': 1, 'b': 2}
+    assert g4.idx_to_token[1] == 'c'
+    assert g4.unknown_token == '<unk>'
+    assert g4.reserved_tokens is None
+
+    g5 = TokenIndexer(counter, most_freq_count=3, min_freq=1,
+                      unknown_token='<unk>', reserved_tokens=None)
+    assert len(g5) == 4
+    assert g5.token_to_idx == {'<unk>': 0, 'c': 1, 'b': 2, 'a': 3}
+    assert g5.idx_to_token[1] == 'c'
+    assert g5.unknown_token == '<unk>'
+    assert g5.reserved_tokens is None
+
+    g6 = TokenIndexer(counter, most_freq_count=100, min_freq=1,
+                      unknown_token='<unk>', reserved_tokens=None)
+    assert len(g6) == 5
+    assert g6.token_to_idx == {'<unk>': 0, 'c': 1, 'b': 2, 'a': 3,
+                               'some_word$': 4}
+    assert g6.idx_to_token[1] == 'c'
+    assert g6.unknown_token == '<unk>'
+    assert g6.reserved_tokens is None
+
+    g7 = TokenIndexer(counter, most_freq_count=1, min_freq=2,
+                      unknown_token='<unk>', reserved_tokens=None)
+    assert len(g7) == 2
+    assert g7.token_to_idx == {'<unk>': 0, 'c': 1}
+    assert g7.idx_to_token[1] == 'c'
+    assert g7.unknown_token == '<unk>'
+    assert g7.reserved_tokens is None
+
+    assertRaises(AssertionError, TokenIndexer, counter, most_freq_count=None,
+                 min_freq=0, unknown_token='<unknown>',
+                 reserved_tokens=['b'])
+
+    assertRaises(AssertionError, TokenIndexer, counter, most_freq_count=None,
+                 min_freq=1, unknown_token='<unknown>',
+                 reserved_tokens=['b', 'b'])
+
+    assertRaises(AssertionError, TokenIndexer, counter, most_freq_count=None,
+                 min_freq=1, unknown_token='<unknown>',
+                 reserved_tokens=['b', '<unknown>'])
+
+    g8 = TokenIndexer(counter, most_freq_count=None, min_freq=1,
+                      unknown_token='<unknown>', reserved_tokens=['b'])
+    assert len(g8) == 5
+    assert g8.token_to_idx == {'<unknown>': 0, 'b': 1, 'c': 2, 'a': 3,
+                               'some_word$': 4}
+    assert g8.idx_to_token[1] == 'b'
+    assert g8.unknown_token == '<unknown>'
+    assert g8.reserved_tokens == ['b']
+
+    g9 = TokenIndexer(counter, most_freq_count=None, min_freq=2,
+                      unknown_token='<unk>', reserved_tokens=['b', 'a'])
+    assert len(g9) == 4
+    assert g9.token_to_idx == {'<unk>': 0, 'b': 1, 'a': 2, 'c': 3}
+    assert g9.idx_to_token[1] == 'b'
+    assert g9.unknown_token == '<unk>'
+    assert g9.reserved_tokens == ['b', 'a']
+
+    g10 = TokenIndexer(counter, most_freq_count=None, min_freq=100,
+                       unknown_token='<unk>', reserved_tokens=['b', 'c'])
+    assert len(g10) == 3
+    assert g10.token_to_idx == {'<unk>': 0, 'b': 1, 'c': 2}
+    assert g10.idx_to_token[1] == 'b'
+    assert g10.unknown_token == '<unk>'
+    assert g10.reserved_tokens == ['b', 'c']
+
+    g11 = TokenIndexer(counter, most_freq_count=1, min_freq=2,
+                       unknown_token='<unk>', reserved_tokens=['<pad>', 'b'])
+    assert len(g11) == 4
+    assert g11.token_to_idx == {'<unk>': 0, '<pad>': 1, 'b': 2, 'c': 3}
+    assert g11.idx_to_token[1] == '<pad>'
+    assert g11.unknown_token == '<unk>'
+    assert g11.reserved_tokens == ['<pad>', 'b']
+
+    g12 = TokenIndexer(counter, most_freq_count=None, min_freq=2,
+                       unknown_token='b', reserved_tokens=['<pad>'])
+    assert len(g12) == 3
+    assert g12.token_to_idx == {'b': 0, '<pad>': 1, 'c': 2}
+    assert g12.idx_to_token[1] == '<pad>'
+    assert g12.unknown_token == 'b'
+    assert g12.reserved_tokens == ['<pad>']
+
+    g13 = TokenIndexer(counter, most_freq_count=None, min_freq=2,
+                       unknown_token='a', reserved_tokens=['<pad>'])
+    assert len(g13) == 4
+    assert g13.token_to_idx == {'a': 0, '<pad>': 1, 'c': 2, 'b': 3}
+    assert g13.idx_to_token[1] == '<pad>'
+    assert g13.unknown_token == 'a'
+    assert g13.reserved_tokens == ['<pad>']
+
+    counter_tuple = Counter([('a', 'a'), ('b', 'b'), ('b', 'b'),
+                             ('c', 'c'), ('c', 'c'), ('c', 'c'),
+                             ('some_word$', 'some_word$')])
+
+    g14 = TokenIndexer(counter_tuple, most_freq_count=None, min_freq=1,
+                       unknown_token=('<unk>', '<unk>'), reserved_tokens=None)
+    assert len(g14) == 5
+    assert g14.token_to_idx == {('<unk>', '<unk>'): 0, ('c', 'c'): 1,
+                                ('b', 'b'): 2, ('a', 'a'): 3,
+                                ('some_word$', 'some_word$'): 4}
+    assert g14.idx_to_token[1] == ('c', 'c')
+    assert g14.unknown_token == ('<unk>', '<unk>')
+    assert g14.reserved_tokens is None
+
+
+def test_glossary_with_one_embed():
+    embed_root = 'embeddings'
+    embed_name = 'my_embed'
+    elem_delim = '\t'
+    pretrain_file = 'my_pretrain_file1.txt'
+
+    _mk_my_pretrain_file(os.path.join(embed_root, embed_name), elem_delim,
+                         pretrain_file)
+
+    pretrain_file_path = os.path.join(embed_root, embed_name, pretrain_file)
+
+    my_embed = CustomEmbedding(pretrain_file_path, elem_delim,
+                               init_unknown_vec=nd.ones)
+
+    counter = Counter(['a', 'b', 'b', 'c', 'c', 'c', 'some_word$'])
+
+    g1 = Glossary(counter, my_embed, most_freq_count=None, min_freq=1,
+                  unknown_token='<unk>', reserved_tokens=['<pad>'])
+
+    assert g1.token_to_idx == {'<unk>': 0, '<pad>': 1, 'c': 2, 'b': 3, 'a': 4,
+                               'some_word$': 5}
+    assert g1.idx_to_token == ['<unk>', '<pad>', 'c', 'b', 'a', 'some_word$']
+
+    assert_almost_equal(g1.idx_to_vec.asnumpy(),
+                        np.array([[1, 1, 1, 1, 1],
+                                  [1, 1, 1, 1, 1],
+                                  [1, 1, 1, 1, 1],
+                                  [0.6, 0.7, 0.8, 0.9, 1],
+                                  [0.1, 0.2, 0.3, 0.4, 0.5],
+                                  [1, 1, 1, 1, 1]])
+                        )
+
+    assert g1.vec_len == 5
+    assert g1.reserved_tokens == ['<pad>']
+
+    assert_almost_equal(g1.get_vecs_by_tokens('c').asnumpy(),
+                        np.array([1, 1, 1, 1, 1])
+                        )
+
+    assert_almost_equal(g1.get_vecs_by_tokens(['c']).asnumpy(),
+                        np.array([[1, 1, 1, 1, 1]])
+                        )
+
+    assert_almost_equal(g1.get_vecs_by_tokens(['a', 'not_exist']).asnumpy(),
+                        np.array([[0.1, 0.2, 0.3, 0.4, 0.5],
+                                  [1, 1, 1, 1, 1]])
+                        )
+
+    assert_almost_equal(g1.get_vecs_by_tokens(['a', 'b']).asnumpy(),
+                        np.array([[0.1, 0.2, 0.3, 0.4, 0.5],
+                                  [0.6, 0.7, 0.8, 0.9, 1]])
+                        )
+
+    assert_almost_equal(g1.get_vecs_by_tokens(['A', 'b']).asnumpy(),
+                        np.array([[1, 1, 1, 1, 1],
+                                  [0.6, 0.7, 0.8, 0.9, 1]])
+                        )
+
+    assert_almost_equal(g1.get_vecs_by_tokens(['A', 'b'],
+                                              lower_case_backup=True).asnumpy(),
+                        np.array([[0.1, 0.2, 0.3, 0.4, 0.5],
+                                  [0.6, 0.7, 0.8, 0.9, 1]])
+                        )
+
+    g1.update_token_vectors(['a', 'b'],
+                            nd.array([[2, 2, 2, 2, 2],
+                                      [3, 3, 3, 3, 3]])
+                            )
+
+    assert_almost_equal(g1.idx_to_vec.asnumpy(),
+                        np.array([[1, 1, 1, 1, 1],
+                                  [1, 1, 1, 1, 1],
+                                  [1, 1, 1, 1, 1],
+                                  [3, 3, 3, 3, 3],
+                                  [2, 2, 2, 2, 2],
+                                  [1, 1, 1, 1, 1]])
+                        )
+
+    assertRaises(ValueError, g1.update_token_vectors, 'unknown$$$',
+                 nd.array([0, 0, 0, 0, 0]))
+
+    assertRaises(AssertionError, g1.update_token_vectors, '<unk>',
+                 nd.array([[0, 0, 0, 0, 0], [0, 0, 0, 0, 0]]))
+
+    assertRaises(AssertionError, g1.update_token_vectors, '<unk>',
+                 nd.array([0]))
+
+    g1.update_token_vectors(['<unk>'],
+                            nd.array([0, 0, 0, 0, 0])
+                            )
+    assert_almost_equal(g1.idx_to_vec.asnumpy(),
+                        np.array([[0, 0, 0, 0, 0],
+                                  [1, 1, 1, 1, 1],
+                                  [1, 1, 1, 1, 1],
+                                  [3, 3, 3, 3, 3],
+                                  [2, 2, 2, 2, 2],
+                                  [1, 1, 1, 1, 1]])
+                        )
+    g1.update_token_vectors(['<unk>'],
+                            nd.array([[10, 10, 10, 10, 10]])
+                            )
+    assert_almost_equal(g1.idx_to_vec.asnumpy(),
+                        np.array([[10, 10, 10, 10, 10],
+                                  [1, 1, 1, 1, 1],
+                                  [1, 1, 1, 1, 1],
+                                  [3, 3, 3, 3, 3],
+                                  [2, 2, 2, 2, 2],
+                                  [1, 1, 1, 1, 1]])
+                        )
+    g1.update_token_vectors('<unk>',
+                            nd.array([0, 0, 0, 0, 0])
+                            )
+    assert_almost_equal(g1.idx_to_vec.asnumpy(),
+                        np.array([[0, 0, 0, 0, 0],
+                                  [1, 1, 1, 1, 1],
+                                  [1, 1, 1, 1, 1],
+                                  [3, 3, 3, 3, 3],
+                                  [2, 2, 2, 2, 2],
+                                  [1, 1, 1, 1, 1]])
+                        )
+    g1.update_token_vectors('<unk>',
+                            nd.array([[10, 10, 10, 10, 10]])
+                            )
+    assert_almost_equal(g1.idx_to_vec.asnumpy(),
+                        np.array([[10, 10, 10, 10, 10],
+                                  [1, 1, 1, 1, 1],
+                                  [1, 1, 1, 1, 1],
+                                  [3, 3, 3, 3, 3],
+                                  [2, 2, 2, 2, 2],
+                                  [1, 1, 1, 1, 1]])
+                        )
+
+
+def test_glossary_with_two_embeds():
+    embed_root = '.'
+    embed_name = 'my_embed'
+    elem_delim = '\t'
+    pretrain_file1 = 'my_pretrain_file1.txt'
+    pretrain_file2 = 'my_pretrain_file2.txt'
+
+    _mk_my_pretrain_file(os.path.join(embed_root, embed_name), elem_delim,
+                         pretrain_file1)
+    _mk_my_pretrain_file2(os.path.join(embed_root, embed_name), elem_delim,
+                          pretrain_file2)
+
+    pretrain_file_path1 = os.path.join(embed_root, embed_name, pretrain_file1)
+    pretrain_file_path2 = os.path.join(embed_root, embed_name, pretrain_file2)
+
+    my_embed1 = CustomEmbedding(pretrain_file_path1, elem_delim,
+                                init_unknown_vec=nd.ones)
+    my_embed2 = CustomEmbedding(pretrain_file_path2, elem_delim)
+
+    counter = Counter(['a', 'b', 'b', 'c', 'c', 'c', 'some_word$'])
+
+    g1 = Glossary(counter, [my_embed1, my_embed2], most_freq_count=None,
+                  min_freq=1, unknown_token='<unk>', reserved_tokens=None)
+
+    assert g1.token_to_idx == {'<unk>': 0, 'c': 1, 'b': 2, 'a': 3,
+                               'some_word$': 4}
+    assert g1.idx_to_token == ['<unk>', 'c', 'b', 'a', 'some_word$']
+
+    assert_almost_equal(g1.idx_to_vec.asnumpy(),
+                        np.array([[1, 1, 1, 1, 1, 0, 0, 0, 0, 0],
+                                  [1, 1, 1, 1, 1, 0.06, 0.07, 0.08, 0.09, 0.1],
+                                  [0.6, 0.7, 0.8, 0.9, 1, 0, 0, 0, 0, 0],
+                                  [0.1, 0.2, 0.3, 0.4, 0.5,
+                                   0.01, 0.02, 0.03, 0.04, 0.05],
+                                  [1, 1, 1, 1, 1, 0, 0, 0, 0, 0]])
+                        )
+
+    assert g1.vec_len == 10
+    assert g1.reserved_tokens is None
+    assert_almost_equal(g1.get_vecs_by_tokens('c').asnumpy(),
+                        np.array([1, 1, 1, 1, 1, 0.06, 0.07, 0.08, 0.09, 0.1])
+                        )
+
+    assert_almost_equal(g1.get_vecs_by_tokens(['b', 'not_exist']).asnumpy(),
+                        np.array([[0.6, 0.7, 0.8, 0.9, 1, 0, 0, 0, 0, 0],
+                                  [1, 1, 1, 1, 1, 0, 0, 0, 0, 0]])
+                        )
+
+    g1.update_token_vectors(['a', 'b'],
+                            nd.array([[2, 2, 2, 2, 2, 2, 2, 2, 2, 2],
+                                      [3, 3, 3, 3, 3, 3, 3, 3, 3, 3]])
+                            )
+    assert_almost_equal(g1.idx_to_vec.asnumpy(),
+                        np.array([[1, 1, 1, 1, 1, 0, 0, 0, 0, 0],
+                                  [1, 1, 1, 1, 1, 0.06, 0.07, 0.08, 0.09, 0.1],
+                                  [3, 3, 3, 3, 3, 3, 3, 3, 3, 3],
+                                  [2, 2, 2, 2, 2, 2, 2, 2, 2, 2],
+                                  [1, 1, 1, 1, 1, 0, 0, 0, 0, 0]])
+                        )
+
+    # Test loaded unknown tokens
+    pretrain_file3 = 'my_pretrain_file3.txt'
+    pretrain_file4 = 'my_pretrain_file4.txt'
+
+    _mk_my_pretrain_file3(os.path.join(embed_root, embed_name), elem_delim,
+                          pretrain_file3)
+    _mk_my_pretrain_file4(os.path.join(embed_root, embed_name), elem_delim,
+                          pretrain_file4)
+
+    pretrain_file_path3 = os.path.join(embed_root, embed_name, pretrain_file3)
+    pretrain_file_path4 = os.path.join(embed_root, embed_name, pretrain_file4)
+
+    my_embed3 = CustomEmbedding(pretrain_file_path3, elem_delim,
+                                init_unknown_vec=nd.ones,
+                                unknown_token='<unk1>')
+    my_embed4 = CustomEmbedding(pretrain_file_path4, elem_delim,
+                                unknown_token='<unk2>')
+
+    g2 = Glossary(counter, [my_embed3, my_embed4], most_freq_count=None,
+                  min_freq=1, unknown_token='<unk>', reserved_tokens=None)
+    assert_almost_equal(g2.idx_to_vec.asnumpy(),
+                        np.array([[1.1, 1.2, 1.3, 1.4, 1.5,
+                                   0.11, 0.12, 0.13, 0.14, 0.15],
+                                  [1.1, 1.2, 1.3, 1.4, 1.5,
+                                   0.06, 0.07, 0.08, 0.09, 0.1],
+                                  [0.6, 0.7, 0.8, 0.9, 1,
+                                   0.11, 0.12, 0.13, 0.14, 0.15],
+                                  [0.1, 0.2, 0.3, 0.4, 0.5,
+                                   0.01, 0.02, 0.03, 0.04, 0.05],
+                                  [1.1, 1.2, 1.3, 1.4, 1.5,
+                                   0.11, 0.12, 0.13, 0.14, 0.15]])
+                        )
+
+    g3 = Glossary(counter, [my_embed3, my_embed4], most_freq_count=None,
+                  min_freq=1, unknown_token='<unk1>', reserved_tokens=None)
+    assert_almost_equal(g3.idx_to_vec.asnumpy(),
+                        np.array([[1.1, 1.2, 1.3, 1.4, 1.5,
+                                   0.11, 0.12, 0.13, 0.14, 0.15],
+                                  [1.1, 1.2, 1.3, 1.4, 1.5,
+                                   0.06, 0.07, 0.08, 0.09, 0.1],
+                                  [0.6, 0.7, 0.8, 0.9, 1,
+                                   0.11, 0.12, 0.13, 0.14, 0.15],
+                                  [0.1, 0.2, 0.3, 0.4, 0.5,
+                                   0.01, 0.02, 0.03, 0.04, 0.05],
+                                  [1.1, 1.2, 1.3, 1.4, 1.5,
+                                   0.11, 0.12, 0.13, 0.14, 0.15]])
+                        )
+
+    g4 = Glossary(counter, [my_embed3, my_embed4], most_freq_count=None,
+                  min_freq=1, unknown_token='<unk2>', reserved_tokens=None)
+    assert_almost_equal(g4.idx_to_vec.asnumpy(),
+                        np.array([[1.1, 1.2, 1.3, 1.4, 1.5,
+                                   0.11, 0.12, 0.13, 0.14, 0.15],
+                                  [1.1, 1.2, 1.3, 1.4, 1.5,
+                                   0.06, 0.07, 0.08, 0.09, 0.1],
+                                  [0.6, 0.7, 0.8, 0.9, 1,
+                                   0.11, 0.12, 0.13, 0.14, 0.15],
+                                  [0.1, 0.2, 0.3, 0.4, 0.5,
+                                   0.01, 0.02, 0.03, 0.04, 0.05],
+                                  [1.1, 1.2, 1.3, 1.4, 1.5,
+                                   0.11, 0.12, 0.13, 0.14, 0.15]])
+                        )
+
+    counter2 = Counter(['b', 'b', 'c', 'c', 'c', 'some_word$'])
+
+    g5 = Glossary(counter2, [my_embed3, my_embed4], most_freq_count=None,
+                  min_freq=1, unknown_token='a', reserved_tokens=None)
+    assert g5.token_to_idx == {'a': 0, 'c': 1, 'b': 2, 'some_word$': 3}
+    assert g5.idx_to_token == ['a', 'c', 'b', 'some_word$']
+    assert_almost_equal(g5.idx_to_vec.asnumpy(),
+                        np.array([[1.1, 1.2, 1.3, 1.4, 1.5,
+                                   0.11, 0.12, 0.13, 0.14, 0.15],
+                                  [1.1, 1.2, 1.3, 1.4, 1.5,
+                                   0.06, 0.07, 0.08, 0.09, 0.1],
+                                  [0.6, 0.7, 0.8, 0.9, 1,
+                                   0.11, 0.12, 0.13, 0.14, 0.15],
+                                  [1.1, 1.2, 1.3, 1.4, 1.5,
+                                   0.11, 0.12, 0.13, 0.14, 0.15]])
+                        )
+
+
+def test_get_embedding_names_and_pretrain_files():
+    assert len(TokenEmbedding.get_embedding_and_pretrained_file_names(
+        embedding_name='fasttext')) == 294
+
+    assert len(TokenEmbedding.get_embedding_and_pretrained_file_names(
+        embedding_name='glove')) == 10
+
+    reg = TokenEmbedding.get_embedding_and_pretrained_file_names(
+        embedding_name=None)
+
+    assert len(reg['glove']) == 10
+    assert len(reg['fasttext']) == 294
+
+    assertRaises(KeyError,
+                 TokenEmbedding.get_embedding_and_pretrained_file_names,
+                 'unknown$$')
+
+
+if __name__ == '__main__':
+    import nose
+    nose.runmodule()
diff --git a/tests/python/unittest/test_gluon_contrib.py b/tests/python/unittest/test_gluon_contrib.py
index 07b8956988..1a188c34b1 100644
--- a/tests/python/unittest/test_gluon_contrib.py
+++ b/tests/python/unittest/test_gluon_contrib.py
@@ -18,6 +18,8 @@
 from __future__ import print_function
 import mxnet as mx
 from mxnet.gluon import contrib
+from mxnet.gluon import nn
+from mxnet.gluon.contrib.nn import Concurrent, HybridConcurrent, Identity
 from mxnet.test_utils import almost_equal
 import numpy as np
 from numpy.testing import assert_allclose
@@ -138,6 +140,39 @@ def check_vardrop(drop_inputs, drop_states, drop_outputs):
     check_vardrop(0.5, 0, 0.5)
 
 
+def test_concurrent():
+    model = HybridConcurrent(axis=1)
+    model.add(nn.Dense(128, activation='tanh', in_units=10))
+    model.add(nn.Dense(64, activation='tanh', in_units=10))
+    model.add(nn.Dense(32, in_units=10))
+    model2 = Concurrent(axis=1)
+    model2.add(nn.Dense(128, activation='tanh', in_units=10))
+    model2.add(nn.Dense(64, activation='tanh', in_units=10))
+    model2.add(nn.Dense(32, in_units=10))
+
+    # symbol
+    x = mx.sym.var('data')
+    y = model(x)
+    assert len(y.list_arguments()) == 7
+
+    # ndarray
+    model.initialize(mx.init.Xavier(magnitude=2.24))
+    model2.initialize(mx.init.Xavier(magnitude=2.24))
+    x = model(mx.nd.zeros((32, 10)))
+    x2 = model2(mx.nd.zeros((32, 10)))
+    assert x.shape == (32, 224)
+    assert x2.shape == (32, 224)
+    x.wait_to_read()
+    x2.wait_to_read()
+
+
+def test_identity():
+    model = Identity()
+    x = mx.nd.random.uniform(shape=(128, 33, 64))
+    mx.test_utils.assert_almost_equal(model(x).asnumpy(),
+                                      x.asnumpy())
+
+
 if __name__ == '__main__':
     import nose
     nose.runmodule()
diff --git a/tests/python/unittest/test_gluon_model_zoo.py b/tests/python/unittest/test_gluon_model_zoo.py
index 39d3b19c36..022f758ea0 100644
--- a/tests/python/unittest/test_gluon_model_zoo.py
+++ b/tests/python/unittest/test_gluon_model_zoo.py
@@ -17,39 +17,12 @@
 
 from __future__ import print_function
 import mxnet as mx
-from mxnet.gluon import nn
-from mxnet.gluon.model_zoo.custom_layers import HybridConcurrent, Identity
 from mxnet.gluon.model_zoo.vision import get_model
 import sys
 
 def eprint(*args, **kwargs):
     print(*args, file=sys.stderr, **kwargs)
 
-def test_concurrent():
-    model = HybridConcurrent(concat_dim=1)
-    model.add(nn.Dense(128, activation='tanh', in_units=10))
-    model.add(nn.Dense(64, activation='tanh', in_units=10))
-    model.add(nn.Dense(32, in_units=10))
-
-    # symbol
-    x = mx.sym.var('data')
-    y = model(x)
-    assert len(y.list_arguments()) == 7
-
-    # ndarray
-    model.collect_params().initialize(mx.init.Xavier(magnitude=2.24))
-    x = model(mx.nd.zeros((32, 10)))
-    assert x.shape == (32, 224)
-    x.wait_to_read()
-
-
-def test_identity():
-    model = Identity()
-    x = mx.nd.random.uniform(shape=(128, 33, 64))
-    mx.test_utils.assert_almost_equal(model(x).asnumpy(),
-                                      x.asnumpy())
-
-
 def test_models():
     all_models = ['resnet18_v1', 'resnet34_v1', 'resnet50_v1', 'resnet101_v1', 'resnet152_v1',
                   'resnet18_v2', 'resnet34_v2', 'resnet50_v2', 'resnet101_v2', 'resnet152_v2',
@@ -62,7 +35,7 @@ def test_models():
     pretrained_to_test = set(['squeezenet1.1'])
 
     for model_name in all_models:
-        test_pretrain = model_name in pretrained_to_test
+        test_pretrain = True #model_name in pretrained_to_test
         model = get_model(model_name, pretrained=test_pretrain, root='model/')
         data_shape = (2, 3, 224, 224) if 'inception' not in model_name else (2, 3, 299, 299)
         eprint('testing forward for %s'%model_name)
diff --git a/tests/python/unittest/test_loss.py b/tests/python/unittest/test_loss.py
index e044df0705..9fb303352b 100644
--- a/tests/python/unittest/test_loss.py
+++ b/tests/python/unittest/test_loss.py
@@ -97,6 +97,14 @@ def test_bce_loss():
             eval_metric=mx.metric.Loss(), optimizer='adam',
             initializer=mx.init.Xavier(magnitude=2))
     assert mod.score(data_iter, eval_metric=mx.metric.Loss())[0][1] < 0.01
+    # Test against npy
+    data = mx.random.uniform(-5, 5, shape=(10,))
+    label = mx.random.uniform(0, 1, shape=(10,))
+    mx_bce_loss = Loss(data, label).asnumpy()
+    prob_npy = 1.0 / (1.0 + np.exp(-data.asnumpy()))
+    label_npy = label.asnumpy()
+    npy_bce_loss = - label_npy * np.log(prob_npy) - (1 - label_npy) * np.log(1 - prob_npy)
+    assert_almost_equal(mx_bce_loss, npy_bce_loss)
 
 def test_bce_equal_ce2():
     N = 100
@@ -107,6 +115,15 @@ def test_bce_equal_ce2():
     label = mx.nd.round(mx.random.uniform(0, 1, shape=(N, 1)))
     assert_almost_equal(loss1(out1, label).asnumpy(), loss2(out2, label).asnumpy())
 
+def test_logistic_loss_equal_bce():
+    N = 100
+    loss_binary = gluon.loss.LogisticLoss(label_format='binary')
+    loss_signed = gluon.loss.LogisticLoss(label_format='signed')
+    loss_bce = gluon.loss.SigmoidBCELoss(from_sigmoid=False)
+    data = mx.random.uniform(-10, 10, shape=(N, 1))
+    label = mx.nd.round(mx.random.uniform(0, 1, shape=(N, 1)))
+    assert_almost_equal(loss_binary(data, label).asnumpy(), loss_bce(data, label).asnumpy())
+    assert_almost_equal(loss_signed(data, 2 * label - 1).asnumpy(), loss_bce(data, label).asnumpy())
 
 def test_kl_loss():
     np.random.seed(1234)


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services