You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by lx...@apache.org on 2017/07/07 15:58:54 UTC
[41/50] [abbrv] incubator-mxnet-test git commit: reworked cachedop. (#6910)

reworked cachedop. (#6910)

kvstore is indexed via strings not ints from now on.
added two more optimizers and reworked sgd optimizer.
auto reshape for module->forward.

Project: http://git-wip-us.apache.org/repos/asf/incubator-mxnet-test/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-mxnet-test/commit/c1590296
Tree: http://git-wip-us.apache.org/repos/asf/incubator-mxnet-test/tree/c1590296
Diff: http://git-wip-us.apache.org/repos/asf/incubator-mxnet-test/diff/c1590296

Branch: refs/heads/master
Commit: c15902967bfee090a2596410dd346654dca4d145
Parents: de5b0fe
Author: Sergey Kolychev <se...@gmail.com>
Authored: Tue Jul 4 19:41:12 2017 -0700
Committer: Eric Junyuan Xie <pi...@users.noreply.github.com>
Committed: Tue Jul 4 19:41:12 2017 -0700

----------------------------------------------------------------------
 perl-package/AI-MXNet/Changes                   |   3 +
 perl-package/AI-MXNet/MANIFEST                  |   1 +
 perl-package/AI-MXNet/META.json                 |   4 +-
 perl-package/AI-MXNet/META.yml                  |   4 +-
 perl-package/AI-MXNet/Makefile.PL               |   6 +-
 perl-package/AI-MXNet/README                    |   2 +-
 perl-package/AI-MXNet/lib/AI/MXNet.pm           |   3 +-
 perl-package/AI-MXNet/lib/AI/MXNet/Base.pm      |   4 +-
 perl-package/AI-MXNet/lib/AI/MXNet/CachedOp.pm  |  89 ++++++
 .../AI-MXNet/lib/AI/MXNet/Executor/Group.pm     |   8 +-
 .../AI-MXNet/lib/AI/MXNet/Initializer.pm        |  43 ++-
 perl-package/AI-MXNet/lib/AI/MXNet/KVStore.pm   |  18 +-
 perl-package/AI-MXNet/lib/AI/MXNet/Module.pm    |  88 +++++-
 .../AI-MXNet/lib/AI/MXNet/Module/Base.pm        |  23 +-
 .../AI-MXNet/lib/AI/MXNet/Module/Bucketing.pm   |  15 +-
 perl-package/AI-MXNet/lib/AI/MXNet/NDArray.pm   |  33 +-
 perl-package/AI-MXNet/lib/AI/MXNet/Optimizer.pm | 298 +++++++++++++++++--
 perl-package/AI-MXNet/lib/AI/MXNet/RNN/Cell.pm  |   5 +-
 perl-package/AI-MXNet/lib/AI/MXNet/RNN/IO.pm    |  14 +-
 perl-package/AI-MXNet/lib/AI/MXNet/Symbol.pm    |  30 ++
 perl-package/AI-MXNet/lib/AI/MXNet/TestUtils.pm |  16 +-
 perl-package/AI-MXNet/t/test_module.t           | 165 +++++++++-
 perl-package/AI-MXNet/t/test_ndarray.t          |  19 +-
 perl-package/AI-MXNet/t/test_optimizers.t       | 161 +++++++---
 perl-package/AI-MXNetCAPI/Changes               |   3 +
 perl-package/AI-MXNetCAPI/META.json             |   2 +-
 perl-package/AI-MXNetCAPI/META.yml              |   2 +-
 perl-package/AI-MXNetCAPI/README                |   2 +-
 perl-package/AI-MXNetCAPI/lib/AI/MXNetCAPI.pm   |   2 +-
 perl-package/AI-MXNetCAPI/mxnet.i               |  62 ++--
 perl-package/AI-MXNetCAPI/mxnet_typemaps.i      |   8 +-
 31 files changed, 966 insertions(+), 167 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-mxnet-test/blob/c1590296/perl-package/AI-MXNet/Changes
----------------------------------------------------------------------
diff --git a/perl-package/AI-MXNet/Changes b/perl-package/AI-MXNet/Changes
index f2663c0..5d5c5a2 100644
--- a/perl-package/AI-MXNet/Changes
+++ b/perl-package/AI-MXNet/Changes
@@ -1,5 +1,8 @@
 Revision history for Perl extension AI::MXNet
 
+1.0101  Sun Jul  2 17:16:01 PDT 2017
+        - reworked CachedOp, two new optimizers, auto module reshape, using strings to index the kvstore.
+
 1.01    Sat Jun 10 23:57:27 PDT 2017
         - sync with python.
 

http://git-wip-us.apache.org/repos/asf/incubator-mxnet-test/blob/c1590296/perl-package/AI-MXNet/MANIFEST
----------------------------------------------------------------------
diff --git a/perl-package/AI-MXNet/MANIFEST b/perl-package/AI-MXNet/MANIFEST
index 855aa0a..7a6d78b 100644
--- a/perl-package/AI-MXNet/MANIFEST
+++ b/perl-package/AI-MXNet/MANIFEST
@@ -32,6 +32,7 @@ t/test_executor.t
 t/test_infer_shape.t
 lib/AI/MXNet.pm
 lib/AI/MXNet/Random.pm
+lib/AI/MXNet/CachedOp.pm
 lib/AI/MXNet/Context.pm
 lib/AI/MXNet/Contrib/AutoGrad.pm
 lib/AI/MXNet/Contrib/Symbol.pm

http://git-wip-us.apache.org/repos/asf/incubator-mxnet-test/blob/c1590296/perl-package/AI-MXNet/META.json
----------------------------------------------------------------------
diff --git a/perl-package/AI-MXNet/META.json b/perl-package/AI-MXNet/META.json
index c2f7530..5454592 100644
--- a/perl-package/AI-MXNet/META.json
+++ b/perl-package/AI-MXNet/META.json
@@ -30,7 +30,7 @@
       },
       "runtime" : {
          "requires" : {
-            "AI::MXNetCAPI" : "1.01",
+            "AI::MXNetCAPI" : "1.0101",
             "AI::NNVMCAPI" : "1.01",
             "Function::Parameters" : "1.0705",
             "GraphViz" : "2.14",
@@ -43,5 +43,5 @@
       }
    },
    "release_status" : "stable",
-   "version" : "1.01"
+   "version" : "1.0101"
 }

http://git-wip-us.apache.org/repos/asf/incubator-mxnet-test/blob/c1590296/perl-package/AI-MXNet/META.yml
----------------------------------------------------------------------
diff --git a/perl-package/AI-MXNet/META.yml b/perl-package/AI-MXNet/META.yml
index 14d5dc3..8c09c96 100644
--- a/perl-package/AI-MXNet/META.yml
+++ b/perl-package/AI-MXNet/META.yml
@@ -17,10 +17,10 @@ no_index:
     - t
     - inc
 requires:
-  AI::MXNetCAPI: '1.01'
+  AI::MXNetCAPI: '1.0101'
   AI::NNVMCAPI: '1.01'
   Function::Parameters: '1.0705'
   GraphViz: '2.14'
   Mouse: v2.1.0
   PDL: '2.007'
-version: '1.01'
+version: '1.0101'

http://git-wip-us.apache.org/repos/asf/incubator-mxnet-test/blob/c1590296/perl-package/AI-MXNet/Makefile.PL
----------------------------------------------------------------------
diff --git a/perl-package/AI-MXNet/Makefile.PL b/perl-package/AI-MXNet/Makefile.PL
index fc5abc0..4f42af0 100644
--- a/perl-package/AI-MXNet/Makefile.PL
+++ b/perl-package/AI-MXNet/Makefile.PL
@@ -19,7 +19,7 @@ my %WriteMakefileArgs = (
   "LICENSE" => "apache_2_0",
   "NAME" => "AI::MXNet",
   "PREREQ_PM" => {
-    "AI::MXNetCAPI" => "1.01",
+    "AI::MXNetCAPI" => "1.0101",
     "AI::NNVMCAPI" => "1.01",
     "Function::Parameters" => "1.0705",
     "Mouse" => "2.1.0",
@@ -27,7 +27,7 @@ my %WriteMakefileArgs = (
     "GraphViz" => "2.14"
   },
   "TEST_REQUIRES" => {},
-  "VERSION" => "1.01",
+  "VERSION" => "1.0101",
   "test" => {
     "TESTS" => "t/*.t"
   }
@@ -35,7 +35,7 @@ my %WriteMakefileArgs = (
 
 
 my %FallbackPrereqs = (
-  "AI::MXNetCAPI" => "1.01",
+  "AI::MXNetCAPI" => "1.0101",
   "AI::NNVMCAPI" => "1.01",
   "Function::Parameters" => "1.0705",
   "Mouse" => "2.1.0",

http://git-wip-us.apache.org/repos/asf/incubator-mxnet-test/blob/c1590296/perl-package/AI-MXNet/README
----------------------------------------------------------------------
diff --git a/perl-package/AI-MXNet/README b/perl-package/AI-MXNet/README
index 85406f6..f275d08 100644
--- a/perl-package/AI-MXNet/README
+++ b/perl-package/AI-MXNet/README
@@ -1,5 +1,5 @@
 This archive contains the distribution AI-MXNet,
-version 1.01:
+version 1.0101:
 
   Perl interface to MXNet machine learning library
 

http://git-wip-us.apache.org/repos/asf/incubator-mxnet-test/blob/c1590296/perl-package/AI-MXNet/lib/AI/MXNet.pm
----------------------------------------------------------------------
diff --git a/perl-package/AI-MXNet/lib/AI/MXNet.pm b/perl-package/AI-MXNet/lib/AI/MXNet.pm
index 41bb1a1..54fb6b3 100644
--- a/perl-package/AI-MXNet/lib/AI/MXNet.pm
+++ b/perl-package/AI-MXNet/lib/AI/MXNet.pm
@@ -28,7 +28,8 @@ use AI::MXNet::RecordIO;
 use AI::MXNet::Image;
 use AI::MXNet::Contrib;
 use AI::MXNet::Contrib::AutoGrad;
-our $VERSION = '1.01';
+use AI::MXNet::CachedOp;
+our $VERSION = '1.0101';
 
 sub import
 {

http://git-wip-us.apache.org/repos/asf/incubator-mxnet-test/blob/c1590296/perl-package/AI-MXNet/lib/AI/MXNet/Base.pm
----------------------------------------------------------------------
diff --git a/perl-package/AI-MXNet/lib/AI/MXNet/Base.pm b/perl-package/AI-MXNet/lib/AI/MXNet/Base.pm
index 93859f6..69f8e43 100644
--- a/perl-package/AI-MXNet/lib/AI/MXNet/Base.pm
+++ b/perl-package/AI-MXNet/lib/AI/MXNet/Base.pm
@@ -3,8 +3,8 @@ use strict;
 use warnings;
 use PDL;
 use PDL::Types qw();
-use AI::MXNetCAPI 0.9506;
-use AI::NNVMCAPI 0.95;
+use AI::MXNetCAPI 1.0101;
+use AI::NNVMCAPI 1.01;
 use AI::MXNet::Types;
 use Time::HiRes;
 use Carp;

http://git-wip-us.apache.org/repos/asf/incubator-mxnet-test/blob/c1590296/perl-package/AI-MXNet/lib/AI/MXNet/CachedOp.pm
----------------------------------------------------------------------
diff --git a/perl-package/AI-MXNet/lib/AI/MXNet/CachedOp.pm b/perl-package/AI-MXNet/lib/AI/MXNet/CachedOp.pm
new file mode 100644
index 0000000..ede4826
--- /dev/null
+++ b/perl-package/AI-MXNet/lib/AI/MXNet/CachedOp.pm
@@ -0,0 +1,89 @@
+package AI::MXNet::CachedOp;
+
+=head1 NAME
+
+    AI::MXNet::CachedOp - A wrapper around CachedOpHandle
+=cut
+
+use strict;
+use warnings;
+use AI::MXNet::Base;
+use Mouse;
+use overload '&{}' => sub { my $self = shift; sub { $self->call(@_) } };
+
+has 'handle'   => (is => 'ro', isa => 'CachedOpHandle', required => 1);
+around BUILDARGS => sub {
+    my $orig  = shift;
+    my $class = shift;
+    my ($sym) = @_;
+    my $handle = check_call(
+        AI::MXNetCAPI::CreateCachedOp(
+            $sym->handle
+        )
+    );
+    return $class->$orig(handle => $handle);
+};
+
+sub DEMOLISH
+{
+    check_call(AI::MXNetCAPI::FreeCachedOp(shift->handle));
+}
+
+sub call
+{
+    my $self = shift;
+    my @args;
+    my %kwargs;
+    if(blessed $_[0] and $_[0]->isa('AI::MXNet::NDArray'))
+    {
+        while(blessed $_[0] and $_[0]->isa('AI::MXNet::NDArray'))
+        {
+            push @args, shift(@_);
+        }
+        %kwargs = @_;
+    }
+    else
+    {
+        %kwargs = @_;
+    }
+    my $out = delete $kwargs{out};
+    if(%kwargs)
+    {
+        confess(
+            "AI::MXNet::CachedOp::call got unexpected keyword argument(s): ".
+            join(', ', keys %kwargs)
+        );
+    }
+    my $original_output;
+    if(defined $out)
+    {
+        $original_output = $out;
+        if(blessed($out))
+        {
+            $out = [$out];
+        }
+    }
+    else
+    {
+        $out = [];
+    }
+    my $output = check_call(
+        AI::MXNetCAPI::InvokeCachedOp(
+            $self->handle,
+            scalar(@args),
+            [map { $_->handle } @args],
+            [map { $_->handle } @$out]
+        )
+    );
+    return $original_output if defined $original_output;
+    if(@$output == 1)
+    {
+        return AI::MXNet::NDArray->new(handle => $output->[0]);
+    }
+    else
+    {
+        return [map { AI::MXNet::NDArray->new(handle => $_) } @$output];
+    }
+}
+
+1;

http://git-wip-us.apache.org/repos/asf/incubator-mxnet-test/blob/c1590296/perl-package/AI-MXNet/lib/AI/MXNet/Executor/Group.pm
----------------------------------------------------------------------
diff --git a/perl-package/AI-MXNet/lib/AI/MXNet/Executor/Group.pm b/perl-package/AI-MXNet/lib/AI/MXNet/Executor/Group.pm
index 0ae2db0..35f1b57 100644
--- a/perl-package/AI-MXNet/lib/AI/MXNet/Executor/Group.pm
+++ b/perl-package/AI-MXNet/lib/AI/MXNet/Executor/Group.pm
@@ -203,8 +203,8 @@ use List::Util qw(sum);
     shared_group : AI::MXNet::DataParallelExecutorGroup
         Default is undef. This is used in bucketing. When not undef, it should be a executor
         group corresponding to a different bucket. In other words, it will correspond to a different
-        symbol but with the same set of parameters (e.g. unrolled RNNs with different lengths).
-        In this case, many memory will be shared.
+        symbol with the same set of parameters (e.g. unrolled RNNs with different lengths).
+        In this case the memory regions of the parameters will be shared.
     logger : Logger
         Default is AI::MXNet::Logging->get_logger.
     fixed_param_names: Maybe[ArrayRef[Str]]
@@ -549,9 +549,9 @@ method reshape(
         A dictionary of name to AI::MXNet::NDArray auxiliary variable mapping.
 =cut
 
-method set_params(HashRef[AI::MXNet::NDArray] $arg_params, HashRef[AI::MXNet::NDArray] $aux_params)
+method set_params(HashRef[AI::MXNet::NDArray] $arg_params, HashRef[AI::MXNet::NDArray] $aux_params, Bool $allow_extra=0)
 {
-    $_->copy_params_from($arg_params, $aux_params) for @{ $self->_p->execs };
+    $_->copy_params_from($arg_params, $aux_params, $allow_extra) for @{ $self->_p->execs };
 }
 
 =head2 get_params

http://git-wip-us.apache.org/repos/asf/incubator-mxnet-test/blob/c1590296/perl-package/AI-MXNet/lib/AI/MXNet/Initializer.pm
----------------------------------------------------------------------
diff --git a/perl-package/AI-MXNet/lib/AI/MXNet/Initializer.pm b/perl-package/AI-MXNet/lib/AI/MXNet/Initializer.pm
index c3eee24..e6beffb 100644
--- a/perl-package/AI-MXNet/lib/AI/MXNet/Initializer.pm
+++ b/perl-package/AI-MXNet/lib/AI/MXNet/Initializer.pm
@@ -15,8 +15,8 @@ use AI::MXNet::Function::Parameters;
     attrs : hash ref of str to str
         attributes of this variable taken from AI::MXNet::Symbol->attr_dict
 =cut
-has 'name'   => (is => 'ro', isa => 'Str', required => 1);
-has 'attrs'  => (is => 'rw', isa => 'HashRef[Str]', lazy => 1, default => sub { +{} });
+has 'name'        => (is => 'ro', isa => 'Str', required => 1);
+has 'attrs'       => (is => 'rw', isa => 'HashRef[Str]', lazy => 1, default => sub { +{} });
 use overload '""' => sub { shift->name };
 around BUILDARGS => sub {
     my $orig  = shift;
@@ -42,6 +42,15 @@ use overload "&{}" => sub { my $self = shift; sub { $self->call(@_) } },
              },
              fallback => 1;
 has 'kwargs' => (is => 'rw', init_arg => undef, isa => 'HashRef');
+has '_verbose'    => (is => 'rw', isa => 'Bool', lazy => 1, default => 0);
+has '_print_func' => (is => 'rw', isa => 'CodeRef', lazy => 1,
+    default => sub {
+        return sub {
+            my $x = shift;
+            return ($x->norm/sqrt($x->size))->asscalar;
+        };
+    }
+);
 
 =head1 NAME
 
@@ -52,6 +61,34 @@ has 'kwargs' => (is => 'rw', init_arg => undef, isa => 'HashRef');
     Register an initializer class to the AI::MXNet::Initializer factory.
 =cut
 
+=head2 set_verbosity
+
+    Switch on/off verbose mode
+
+    Parameters
+    ----------
+    $verbose : bool
+        switch on/off verbose mode
+    $print_func : CodeRef
+        A function that computes statistics of initialized arrays.
+        Takes an AI::MXNet::NDArray and returns a scalar. Defaults to mean
+        absolute value |x|/size(x)
+=cut
+
+method set_verbosity(Bool $verbose=0, CodeRef $print_func=)
+{
+    $self->_verbose($verbose);
+    $self->_print_func($print_func) if defined $print_func;
+}
+
+method _verbose_print($desc, $init, $arr)
+{
+    if($self->_verbose and defined $self->_print_func)
+    {
+        AI::MXNet::Logging->info('Initialized %s as %s: %s', $desc, $init, $self->_print_func->($arr));
+    }
+}
+
 my %init_registry;
 method get_init_registry()
 {
@@ -99,6 +136,7 @@ method call(Str|AI::MXNet::InitDesc $desc, AI::MXNet::NDArray $arr)
     {
       my ($klass, $kwargs) = @{ decode_json($init) };
       $self->get_init_registry->{ lc $klass }->new(%{ $kwargs })->_init_weight("$desc", $arr);
+      $self->_verbose_print($desc, $init, $arr);
     }
     else
     {
@@ -107,6 +145,7 @@ method call(Str|AI::MXNet::InitDesc $desc, AI::MXNet::NDArray $arr)
         {
             my $method = "_init_$1";
             $self->$method($desc, $arr);
+            $self->_verbose_print($desc, $1, $arr);
         }
         else
         {

http://git-wip-us.apache.org/repos/asf/incubator-mxnet-test/blob/c1590296/perl-package/AI-MXNet/lib/AI/MXNet/KVStore.pm
----------------------------------------------------------------------
diff --git a/perl-package/AI-MXNet/lib/AI/MXNet/KVStore.pm b/perl-package/AI-MXNet/lib/AI/MXNet/KVStore.pm
index 9f36ceb..465cfd6 100644
--- a/perl-package/AI-MXNet/lib/AI/MXNet/KVStore.pm
+++ b/perl-package/AI-MXNet/lib/AI/MXNet/KVStore.pm
@@ -36,7 +36,7 @@ sub DEMOLISH
 
     Parameters
     ----------
-    key : int or an array ref of int
+    key : str or an array ref of str
         The keys.
     value : NDArray or an array ref of NDArray objects
         The values.
@@ -59,13 +59,13 @@ sub DEMOLISH
 =cut
 
 method init(
-    Int|ArrayRef[Int] $key,
+    Str|ArrayRef[Str] $key,
     AI::MXNet::NDArray|ArrayRef[AI::MXNet::NDArray]|ArrayRef[ArrayRef[AI::MXNet::NDArray]] $value
 )
 {
     my ($keys, $vals) = _key_value($key, $value);
     check_call(
-        AI::MXNetCAPI::KVStoreInit(
+        AI::MXNetCAPI::KVStoreInitEx(
             $self->handle, scalar(@{ $keys }), $keys, $vals
         )
     );
@@ -83,7 +83,7 @@ method init(
 
     Parameters
     ----------
-    key : int or array ref of int
+    key : str or array ref of str
     value : NDArray or array ref of NDArray or array ref of array refs of NDArray
     priority : int, optional
         The priority of the push operation.
@@ -127,14 +127,14 @@ method init(
 =cut
 
 method push(
-    Int|ArrayRef[Int] $key,
+    Str|ArrayRef[Str] $key,
     AI::MXNet::NDArray|ArrayRef[AI::MXNet::NDArray]|ArrayRef[ArrayRef[AI::MXNet::NDArray]] $value,
     Int :$priority=0
 )
 {
     my ($keys, $vals) = _key_value($key, $value);
     check_call(
-        AI::MXNetCAPI::KVStorePush(
+        AI::MXNetCAPI::KVStorePushEx(
             $self->handle, scalar(@{ $keys }), $keys, $vals, $priority
         )
     );
@@ -154,7 +154,7 @@ method push(
 
     Parameters
     ----------
-    key : int or array ref of int
+    key : str or array ref of str
         Keys
     out: NDArray or array ref of NDArray or array ref of array refs of NDArray
         According values
@@ -197,14 +197,14 @@ method push(
 =cut
 
 method pull(
-    Int|ArrayRef[Int] $key,
+    Str|ArrayRef[Str] $key,
     AI::MXNet::NDArray|ArrayRef[AI::MXNet::NDArray]|ArrayRef[ArrayRef[AI::MXNet::NDArray]] :$out,
     Int :$priority=0
 )
 {
     my ($keys, $vals) = _key_value($key, $out);
     check_call(
-        AI::MXNetCAPI::KVStorePull(
+        AI::MXNetCAPI::KVStorePullEx(
             $self->handle, scalar(@{ $keys }), $keys, $vals, $priority
         )
     );

http://git-wip-us.apache.org/repos/asf/incubator-mxnet-test/blob/c1590296/perl-package/AI-MXNet/lib/AI/MXNet/Module.pm
----------------------------------------------------------------------
diff --git a/perl-package/AI-MXNet/lib/AI/MXNet/Module.pm b/perl-package/AI-MXNet/lib/AI/MXNet/Module.pm
index 2c5a2a5..ba70fd0 100644
--- a/perl-package/AI-MXNet/lib/AI/MXNet/Module.pm
+++ b/perl-package/AI-MXNet/lib/AI/MXNet/Module.pm
@@ -18,6 +18,7 @@ package AI::MXNet::Module;
 use AI::MXNet::Base;
 use AI::MXNet::Function::Parameters;
 use List::Util qw(max);
+use Data::Dumper ();
 use Mouse;
 
 func _create_kvstore(
@@ -71,10 +72,11 @@ func _initialize_kvstore(
 {
     enumerate(sub{
         my ($idx, $param_on_devs) = @_;
-        $kvstore->init($idx, $arg_params->{ $param_names->[$idx] });
+        my $name = $param_names->[$idx];
+        $kvstore->init($name, $arg_params->{ $name });
         if($update_on_kvstore)
         {
-            $kvstore->pull($idx, out => $param_on_devs, priority => -$idx);
+            $kvstore->pull($name, out => $param_on_devs, priority => -$idx);
         }
     }, $param_arrays);
 }
@@ -82,7 +84,8 @@ func _initialize_kvstore(
 func _update_params_on_kvstore(
     ArrayRef[AI::MXNet::NDArray]|ArrayRef[ArrayRef[AI::MXNet::NDArray]] $param_arrays,
     ArrayRef[AI::MXNet::NDArray]|ArrayRef[ArrayRef[AI::MXNet::NDArray]] $grad_arrays,
-    AI::MXNet::KVStore           $kvstore
+    AI::MXNet::KVStore           $kvstore,
+    ArrayRef[Str]                $param_names
 )
 {
     enumerate(sub{
@@ -91,10 +94,11 @@ func _update_params_on_kvstore(
         {
             return;
         }
+        my $name = $param_names->[$index];
         # push gradient, priority is negative index
-        $kvstore->push($index, $grad_list, priority => -$index);
+        $kvstore->push($name, $grad_list, priority => -$index);
         # pull back the weights
-        $kvstore->pull($index, out => $arg_list, priority  => -$index);
+        $kvstore->pull($name, out => $arg_list, priority  => -$index);
     }, $param_arrays, $grad_arrays);
 }
 
@@ -103,7 +107,8 @@ func _update_params(
     ArrayRef[ArrayRef[AI::MXNet::NDArray]] $grad_arrays,
     AI::MXNet::Updater                     $updater,
     Int                                    $num_device,
-    Maybe[AI::MXNet::KVStore]              $kvstore=
+    Maybe[AI::MXNet::KVStore]              $kvstore=,
+    Maybe[ArrayRef[Str]]                   $param_names=
 )
 {
     enumerate(sub{
@@ -114,16 +119,17 @@ func _update_params(
         }
         if($kvstore)
         {
+            my $name = $param_names->[$index];
             # push gradient, priority is negative index
-            $kvstore->push($index, $grad_list, priority => -$index);
+            $kvstore->push($name, $grad_list, priority => -$index);
             # pull back the sum gradients, to the same locations.
-            $kvstore->pull($index, out => $grad_list, priority => -$index);
+            $kvstore->pull($name, out => $grad_list, priority => -$index);
         }
         enumerate(sub {
             my ($k, $w, $g) = @_;
             # faked an index here, to make optimizer create diff
             # state for the same index but on diff devs, TODO(mli)
-            # use a better solution latter
+            # use a better solution later
             &{$updater}($index*$num_device+$k, $g, $w);
         }, $arg_list, $grad_list);
     }, $param_arrays, $grad_arrays);
@@ -399,7 +405,8 @@ method init_params(
     Maybe[HashRef[AI::MXNet::NDArray]] :$arg_params=,
     Maybe[HashRef[AI::MXNet::NDArray]] :$aux_params=,
     Bool                               :$allow_missing=0,
-    Bool                               :$force_init=0
+    Bool                               :$force_init=0,
+    Bool                               :$allow_extra=0
 )
 {
     if($self->params_initialized and not $force_init)
@@ -467,21 +474,23 @@ method init_params(
     $self->_p->_params_dirty(0);
 
     # copy the initialized parameters to devices
-    $self->_p->_exec_group->set_params($self->_p->_arg_params, $self->_p->_aux_params);
+    $self->_p->_exec_group->set_params($self->_p->_arg_params, $self->_p->_aux_params, $allow_extra);
 }
 
 method set_params(
     HashRef[AI::MXNet::NDArray]  $arg_params,
     HashRef[AI::MXNet::NDArray]  $aux_params,
     Bool                        :$allow_missing=0,
-    Bool                        :$force_init=1
+    Bool                        :$force_init=1,
+    Bool                        :$allow_extra=0
 )
 {
     if(not $allow_missing)
     {
         $self->init_params(
             arg_params    => $arg_params,    aux_params => $aux_params,
-            allow_missing => $allow_missing, force_init => $force_init
+            allow_missing => $allow_missing, force_init => $force_init,
+            allow_extra   => $allow_extra
         );
         return;
     }
@@ -494,7 +503,7 @@ method set_params(
         );
         return;
     }
-    $self->_p->_exec_group->set_params($arg_params, $aux_params);
+    $self->_p->_exec_group->set_params($arg_params, $aux_params, $allow_extra);
     $self->_p->_params_dirty(1);
     $self->params_initialized(1);
 }
@@ -770,6 +779,51 @@ method forward(
 )
 {
     assert($self->binded and $self->params_initialized);
+    # If starting to do the inference, force rebind the module.
+    if($self->label_shapes and not $data_batch->label)
+    {
+        confess(
+            "If you are trying to do inference, rebind module ".
+            "with 'force_rebind=True' and 'for_training=False'"
+        );
+    }
+
+    my @curr_data_shapes = map { $_->shape } @{ $self->data_shapes };
+    my @new_data_shapes  = map { $_->shape } @{ $data_batch->data };
+    if(Data::Dumper->Dump(\@curr_data_shapes) ne Data::Dumper->Dump(\@new_data_shapes))
+    {
+        my $new_dshape;
+        if($data_batch->can('provide_data') and $data_batch->provide_data)
+        {
+            $new_dshape = $data_batch->provide_data;
+        }
+        else
+        {
+            $new_dshape = [];
+            zip(sub {
+                my ($i, $shape) = @_;
+                push @{ $new_dshape }, AI::MXNet::DataDesc->new(
+                    $i->name, $shape, $i->dtype, $i->layout
+                );
+            }, $self->data_shapes, \@new_data_shapes);
+        }
+        my $new_lshape;
+        if($data_batch->can('provide_label') and $data_batch->provide_label)
+        {
+            $new_lshape = $data_batch->provide_label;
+        }
+        elsif($data_batch->can('label') and $data_batch->label)
+        {
+            $new_lshape = [];
+            zip(sub {
+                my ($i, $j) = @_;
+                push @{ $new_lshape }, AI::MXNet::DataDesc->new(
+                    $i->name, $j->shape, $i->dtype, $i->layout
+                );
+            }, $self->label_shapes, $data_batch->label);
+        }
+        $self->reshape(data_shapes => $new_dshape, label_shapes => $new_lshape);
+    }
     $self->_p->_exec_group->forward($data_batch, $is_train);
 }
 
@@ -788,7 +842,8 @@ method update()
         _update_params_on_kvstore(
             $self->_p->_exec_group->_p->param_arrays,
             $self->_p->_exec_group->_p->grad_arrays,
-            $self->_p->_kvstore
+            $self->_p->_kvstore,
+            $self->_p->_exec_group->param_names
         );
     }
     else
@@ -798,7 +853,8 @@ method update()
             $self->_p->_exec_group->_p->grad_arrays,
             $self->_p->_updater,
             scalar(@{ $self->_p->_context}),
-            $self->_p->_kvstore
+            $self->_p->_kvstore,
+            $self->_p->_exec_group->param_names
         );
     }
 }

http://git-wip-us.apache.org/repos/asf/incubator-mxnet-test/blob/c1590296/perl-package/AI-MXNet/lib/AI/MXNet/Module/Base.pm
----------------------------------------------------------------------
diff --git a/perl-package/AI-MXNet/lib/AI/MXNet/Module/Base.pm b/perl-package/AI-MXNet/lib/AI/MXNet/Module/Base.pm
index 44df735..293696d 100644
--- a/perl-package/AI-MXNet/lib/AI/MXNet/Module/Base.pm
+++ b/perl-package/AI-MXNet/lib/AI/MXNet/Module/Base.pm
@@ -677,6 +677,10 @@ method get_params() { confess("NotImplemented") }
         called to fill those missing params.
     :$force_init=0 : Bool
         If true, will force re-initialize even if already initialized.
+    :$allow_extra=0 : Boolean, optional
+        Whether allow extra parameters that are not needed by symbol.
+        If this is True, no error will be thrown when arg_params or aux_params
+        contain extra parameters that is not needed by the executor.
 =cut
 
 method init_params(
@@ -684,7 +688,8 @@ method init_params(
     Maybe[HashRef[AI::MXNet::NDArray]] :$arg_params=,
     Maybe[HashRef[AI::MXNet::NDArray]] :$aux_params=,
     Bool                               :$allow_missing=0,
-    Bool                               :$force_init=0
+    Bool                               :$force_init=0,
+    Bool                               :$allow_extra=0
 )
 {
     confess("NotImplemented");
@@ -705,13 +710,18 @@ method init_params(
         called to fill those missing params.
     :$force_init=0 : Bool
         If true, will force re-initialize even if already initialized.
+    :$allow_extra=0 : Bool
+        Whether allow extra parameters that are not needed by symbol.
+        If this is True, no error will be thrown when arg_params or aux_params
+        contain extra parameters that is not needed by the executor.
 =cut
 
 method set_params(
     Maybe[HashRef[AI::MXNet::NDArray]]  $arg_params=,
     Maybe[HashRef[AI::MXNet::NDArray]]  $aux_params=,
     Bool                               :$allow_missing=0,
-    Bool                               :$force_init=0
+    Bool                               :$force_init=0,
+    Bool                               :$allow_extra=0
 )
 {
     $self->init_params(
@@ -719,7 +729,8 @@ method set_params(
         arg_params    => $arg_params,
         aux_params    => $aux_params,
         allow_missing => $allow_missing,
-        force_init    => $force_init
+        force_init    => $force_init,
+        allow_extra   => $allow_extra
     );
 }
 
@@ -865,7 +876,11 @@ method prepare(AI::MXNet::DataBatch $data_batch){}
 
 =head2 forward
 
-    Forward computation.
+    Forward computation. It supports data batches with different shapes, such as
+    different batch sizes or different image sizes.
+    If reshaping of data batch relates to modification of symbol or module, such as
+    changing image layout ordering or switching from training to predicting, module
+    rebinding is required.
 
     Parameters
     ----------

http://git-wip-us.apache.org/repos/asf/incubator-mxnet-test/blob/c1590296/perl-package/AI-MXNet/lib/AI/MXNet/Module/Bucketing.pm
----------------------------------------------------------------------
diff --git a/perl-package/AI-MXNet/lib/AI/MXNet/Module/Bucketing.pm b/perl-package/AI-MXNet/lib/AI/MXNet/Module/Bucketing.pm
index 30bdc43..af768f0 100644
--- a/perl-package/AI-MXNet/lib/AI/MXNet/Module/Bucketing.pm
+++ b/perl-package/AI-MXNet/lib/AI/MXNet/Module/Bucketing.pm
@@ -210,14 +210,16 @@ method set_params(
     HashRef[AI::MXNet::NDArray] $arg_params,
     HashRef[AI::MXNet::NDArray] $aux_params,
     Bool                        $allow_missing=0,
-    Bool                        $force_init=1
+    Bool                        $force_init=1,
+    Bool                        $allow_extra=0
 )
 {
     if(not $allow_missing)
     {
         $self->init_params(
             arg_params    => $arg_params,    aux_params => $aux_params,
-            allow_missing => $allow_missing, force_init => $force_init
+            allow_missing => $allow_missing, force_init => $force_init,
+            allow_extra   => $allow_extra
         );
        return;
     }
@@ -232,7 +234,8 @@ method set_params(
     $self->_curr_module->set_params(
         $arg_params, $aux_params,
         allow_missing => $allow_missing,
-        force_init    => $force_init
+        force_init    => $force_init,
+        allow_extra   => $allow_extra
     );
     # because we didn't update self._arg_params, they are dirty now.
     $self->_params_dirty(1);
@@ -244,7 +247,8 @@ method init_params(
     Maybe[HashRef[AI::MXNet::NDArray]] :$arg_params=,
     Maybe[HashRef[AI::MXNet::NDArray]] :$aux_params=,
     Bool                               :$allow_missing=0,
-    Bool                               :$force_init=0
+    Bool                               :$force_init=0,
+    Bool                               :$allow_extra=0
 )
 {
     return if($self->params_initialized and not $force_init);
@@ -254,7 +258,8 @@ method init_params(
         arg_params    => $arg_params,
         aux_params    => $aux_params,
         allow_missing => $allow_missing,
-        force_init    => $force_init
+        force_init    => $force_init,
+        allow_extra   => $allow_extra
     );
     $self->_params_dirty(0);
     $self->params_initialized(1);

http://git-wip-us.apache.org/repos/asf/incubator-mxnet-test/blob/c1590296/perl-package/AI-MXNet/lib/AI/MXNet/NDArray.pm
----------------------------------------------------------------------
diff --git a/perl-package/AI-MXNet/lib/AI/MXNet/NDArray.pm b/perl-package/AI-MXNet/lib/AI/MXNet/NDArray.pm
index 53579b2..edeb9b1 100644
--- a/perl-package/AI-MXNet/lib/AI/MXNet/NDArray.pm
+++ b/perl-package/AI-MXNet/lib/AI/MXNet/NDArray.pm
@@ -12,7 +12,7 @@ use AI::MXNet::NDArray::Slice;
 use AI::MXNet::Context;
 use Mouse;
 use AI::MXNet::Function::Parameters;
-use overload 
+use overload
     '""' => \&stringify,
     '+'  => \&add,
     '+=' => \&iadd,
@@ -22,6 +22,8 @@ use overload
     '*=' => \&imultiply,
     '/'  => \&divide,
     '/=' => \&idivide,
+    '%'  => \&modulo,
+    '%=' => \&imodulo,
     '**' => \&power,
     '==' => \&equal,
     '!=' => \&not_equal,
@@ -864,6 +866,24 @@ method true_divide(AI::MXNet::NDArray|Num $other, $reverse=)
     return $self->divide($other, $reverse);
 }
 
+method modulo(AI::MXNet::NDArray|Num $other, $reverse=)
+{
+    return _ufunc_helper(
+        $self,
+        $other,
+        qw/broadcast_mod _mod_scalar _rmod_scalar/,
+        $reverse
+    );
+}
+
+method imodulo(AI::MXNet::NDArray|Num $other, $reverse=)
+{
+    confess('trying to modulo to a readonly NDArray') unless $self->writable;
+    return ref $other
+        ? __PACKAGE__->broadcast_mod($self, $other, { out => $self })
+        : __PACKAGE__->_mod_scalar($self, $other, { out => $self })
+}
+
 =head2 empty
 
     Creates an empty uninitialized NDArray, with the specified shape.
@@ -998,7 +1018,7 @@ method full(
 
     Parameters
     ----------
-    $source_array : PDL, PDL::Matrix, Array ref in PDL::pdl format
+    $source_array : AI::MXNet::NDArray PDL, PDL::Matrix, Array ref in PDL::pdl format
         Source data to create NDArray from.
 
     :$ctx : AI::MXNet::Context, optional
@@ -1013,8 +1033,14 @@ method full(
         The created NDArray.
 =cut
 
-method array(PDL|PDL::Matrix|ArrayRef $source_array, AI::MXNet::Context :$ctx=AI::MXNet::Context->current_ctx, Dtype :$dtype='float32')
+method array(PDL|PDL::Matrix|ArrayRef|AI::MXNet::NDArray $source_array, AI::MXNet::Context :$ctx=AI::MXNet::Context->current_ctx, Dtype :$dtype='float32')
 {
+    if(blessed $source_array and $source_array->isa('AI::MXNet::NDArray'))
+    {
+        my $arr = __PACKAGE__->empty($source_array->shape, ctx => $ctx, dtype => $dtype);
+        $arr .= $source_array;
+        return $arr;
+    }
     my $pdl_type = PDL::Type->new(DTYPE_MX_TO_PDL->{ $dtype });
     if(not blessed($source_array))
     {
@@ -1372,6 +1398,7 @@ method backward(Maybe[AI::MXNet::NDArray] $out_grad=, Bool $retain_graph=0)
     )
 }
 
+method CachedOp(@args) { AI::MXNet::CachedOp->new(@args) }
 
 my $lvalue_methods = join "\n", map {"use attributes 'AI::MXNet::NDArray', \\&AI::MXNet::NDArray::$_, 'lvalue';"}
 qw/at slice aspdl asmpdl reshape copy sever T astype as_in_context copyto empty zero ones full

http://git-wip-us.apache.org/repos/asf/incubator-mxnet-test/blob/c1590296/perl-package/AI-MXNet/lib/AI/MXNet/Optimizer.pm
----------------------------------------------------------------------
diff --git a/perl-package/AI-MXNet/lib/AI/MXNet/Optimizer.pm b/perl-package/AI-MXNet/lib/AI/MXNet/Optimizer.pm
index 8b60db6..08b9565 100644
--- a/perl-package/AI-MXNet/lib/AI/MXNet/Optimizer.pm
+++ b/perl-package/AI-MXNet/lib/AI/MXNet/Optimizer.pm
@@ -258,8 +258,15 @@ method _get_wd(Index $index)
     clip_gradient : float, optional
         clip gradient in range [-clip_gradient, clip_gradient]
 
-    param_idx2name : dict of string/int to float, optional
+    param_idx2name : hash of string/int to float, optional
         special treat weight decay in parameter ends with bias, gamma, and beta
+
+    multi_precision: bool, optional
+        Flag to control the internal precision of the optimizer.
+        False results in using the same precision as the weights (default),
+        True makes internal 32-bit copy of the weights and applies gradients
+        in 32-bit precision even if actual weights used in the model have lower precision.
+        Turning this on can improve convergence and accuracy when training with float16.
 =cut
 
 package AI::MXNet::SGD;
@@ -268,6 +275,7 @@ extends 'AI::MXNet::Optimizer';
 
 has 'kwargs'   => (is => "rw", isa => "HashRef[Num]");
 has 'momentum' => (is => "rw", isa => "Num", default => 0);
+has 'multi_precision' => (is => "ro", isa => "Bool", default => 0);
 
 sub BUILD
 {
@@ -285,52 +293,79 @@ sub BUILD
 
 method create_state(Index $index, AI::MXNet::NDArray $weight)
 {
-    if($self->momentum == 0)
+    my $momentum;
+    my $weight_master_copy;
+    if($self->multi_precision and $weight->dtype eq 'float16')
     {
-        return undef;
+        my $weight_master_copy = AI::MXNet::NDArray->array($weight, ctx => $weight->context, dtype => 'float32');
+        if($self->momentum != 0)
+        {
+            $momentum = AI::MXNet::NDArray->zeros($weight->shape, ctx => $weight->context, dtype => 'float32');
+        }
+        return [$momentum, $weight_master_copy];
     }
-    else
+    if($weight->dtype eq 'float16' and not $self->multi_precision)
     {
-        return AI::MXNet::NDArray->zeros(
-            $weight->shape, ctx => $weight->context, dtype => $weight->dtype
+        AI::MXNet::Logging->warning(
+            "Accumulating with float16 in optimizer can lead to ".
+            "poor accuracy or slow convergence. ".
+            "Consider using multi_precision=True option of the ".
+            "SGD optimizer"
         );
     }
+    if($self->momentum != 0)
+    {
+        $momentum = AI::MXNet::NDArray->zeros($weight->shape, ctx => $weight->context, dtype => $weight->dtype);
+    }
+    return $momentum;
 }
 
 method update(
     Index                     $index,
     AI::MXNet::NDArray        $weight,
     AI::MXNet::NDArray        $grad,
-    Maybe[AI::MXNet::NDArray] $state
+    Maybe[AI::MXNet::NDArray|ArrayRef[Maybe[AI::MXNet::NDArray]]] $state
 )
 {
     my $lr = $self->_get_lr($index);
     my $wd = $self->_get_wd($index);
     $self->_update_count($index);
-    if($state)
+    my $kwargs = {
+        out => $weight,
+        lr  => $lr,
+        wd  => $wd,
+        %{ $self->kwargs }
+    };
+    my $use_multi_precision = ref($state) eq 'ARRAY';
+    if(not $use_multi_precision)
     {
-        AI::MXNet::NDArray->sgd_mom_update(
-            $weight, $grad, $state,
-            {
-                out => $weight,
-                lr  => $lr,
-                wd  => $wd,
-                %{ $self->kwargs }
-            }
-        );
+        if(defined $state)
+        {
+            AI::MXNet::NDArray->sgd_mom_update(
+                $weight, $grad, $state, $kwargs
+            );
+        }
+        else
+        {
+            AI::MXNet::NDArray->sgd_update(
+                $weight, $grad, $kwargs
+            );
+        }
     }
     else
     {
-        AI::MXNet::NDArray->sgd_update(
-            $weight,
-            $grad,
-            {
-                out => $weight,
-                lr  => $lr,
-                wd  => $wd,
-                %{ $self->kwargs }
-            }
-        );
+        if(defined $state->[0])
+        {
+            AI::MXNet::NDArray->mp_sgd_mom_update(
+                $weight, $grad, $state->[0], $state->[1], $kwargs
+            );
+        }
+        else
+        {
+            AI::MXNet::NDArray->mp_sgd_update(
+                $weight, $grad, $state->[1], $kwargs
+            );
+        }
     }
 }
 
@@ -1081,6 +1116,184 @@ method update(
                (($self->beta + $n->sqrt) / $lr + $wd) * ($dn->abs > $self->lamda1);
 }
 
+__PACKAGE__->register;
+
+package AI::MXNet::Adamax;
+
+=head1 NAME
+
+    AI::MXNet::Adamax
+=cut
+
+=head1 DESCRIPTION
+
+    It is a variant of Adam based on the infinity norm
+    available at http://arxiv.org/abs/1412.6980 Section 7.
+
+    This optimizer accepts the following parameters in addition to those accepted
+    AI::MXNet::Optimizer.
+
+    Parameters
+    ----------
+    beta1 : float, optional
+        Exponential decay rate for the first moment estimates.
+    beta2 : float, optional
+        Exponential decay rate for the second moment estimates.
+=cut
+
+use Mouse;
+extends 'AI::MXNet::Optimizer';
+has '+learning_rate' => (default => 0.002);
+has 'beta1'          => (is => "ro", isa => "Num",  default => 0.9);
+has 'beta2'          => (is => "ro", isa => "Num",  default => 0.999);
+
+method create_state(Index $index, AI::MXNet::NDArray $weight)
+{
+    return [
+            AI::MXNet::NDArray->zeros(
+                $weight->shape,
+                ctx => $weight->context,
+                dtype => $weight->dtype
+            ),  # mean
+            AI::MXNet::NDArray->zeros(
+                $weight->shape,
+                ctx => $weight->context,
+                dtype => $weight->dtype
+            )   # variance
+    ];
+}
+
+method update(
+    Index $index,
+    AI::MXNet::NDArray $weight,
+    AI::MXNet::NDArray $grad,
+    ArrayRef[AI::MXNet::NDArray] $state
+)
+{
+    my $wd = $self->_get_wd($index);
+    my $lr = $self->_get_lr($index);
+    $self->_update_count($index);
+    my $t = $self->_index_update_count->{$index};
+    $lr /= (1 - $self->beta1**$t);
+
+    $grad = $grad * $self->rescale_grad + $wd * $weight;
+    if($self->clip_gradient)
+    {
+        $grad = AI::MXNet::NDArray->clip(
+            $grad,
+            -$self->clip_gradient,
+             $self->clip_gradient
+        );
+    }
+
+    # update m_t and u_t
+    my($m_t, $u_t) = @{ $state };
+    $m_t .= $self->beta1 * $m_t + (1 - $self->beta1) * $grad;
+    $u_t .= AI::MXNet::NDArray->maximum($self->beta2 * $u_t, $grad->abs);
+
+    # update weight
+    $weight -= $lr * $m_t / $u_t;
+}
+
+__PACKAGE__->register;
+
+package AI::MXNet::Nadam;
+
+=head1 NAME
+
+    AI::MXNet::Nadam
+=cut
+
+=head1 DESCRIPTION
+
+    The Nesterov Adam optimizer.
+
+    Much like Adam is essentially RMSprop with momentum,
+    Nadam is Adam RMSprop with Nesterov momentum available
+    at http://cs229.stanford.edu/proj2015/054_report.pdf.
+
+    This optimizer accepts the following parameters in addition to those accepted
+    AI::MXNet::Optimizer.
+
+    Parameters
+    ----------
+    beta1 : float, optional
+        Exponential decay rate for the first moment estimates.
+    beta2 : float, optional
+        Exponential decay rate for the second moment estimates.
+    epsilon : float, optional
+        Small value to avoid division by 0.
+    schedule_decay : float, optional
+        Exponential decay rate for the momentum schedule
+=cut
+
+use Mouse;
+extends 'AI::MXNet::Optimizer';
+has '+learning_rate' => (default => 0.001);
+has 'beta1'          => (is => "ro", isa => "Num",  default => 0.9);
+has 'beta2'          => (is => "ro", isa => "Num",  default => 0.999);
+has 'epsilon'        => (is => "ro", isa => "Num",  default => 1e-8);
+has 'schedule_decay' => (is => "ro", isa => "Num",  default => 0.004);
+has 'm_schedule'     => (is => "rw", default => 1, init_arg => undef);
+
+method create_state(Index $index, AI::MXNet::NDArray $weight)
+{
+    return [
+            AI::MXNet::NDArray->zeros(
+                $weight->shape,
+                ctx => $weight->context,
+                dtype => $weight->dtype
+            ),  # mean
+            AI::MXNet::NDArray->zeros(
+                $weight->shape,
+                ctx => $weight->context,
+                dtype => $weight->dtype
+            )   # variance
+    ];
+}
+
+method update(
+    Index $index,
+    AI::MXNet::NDArray $weight,
+    AI::MXNet::NDArray $grad,
+    ArrayRef[AI::MXNet::NDArray] $state
+)
+{
+    my $wd = $self->_get_wd($index);
+    my $lr = $self->_get_lr($index);
+    $self->_update_count($index);
+    my $t = $self->_index_update_count->{$index};
+    $grad = $grad * $self->rescale_grad + $wd * $weight;
+    if($self->clip_gradient)
+    {
+        $grad = AI::MXNet::NDArray->clip(
+            $grad,
+            -$self->clip_gradient,
+             $self->clip_gradient
+        );
+    }
+    # warming momentum schedule
+    my $momentum_t    = $self->beta1 * (1 - 0.5 * (0.96**($t * $self->schedule_decay)));
+    my $momentum_t_1  = $self->beta1 * (1 - 0.5 * (0.96**(($t + 1) * $self->schedule_decay)));
+    $self->m_schedule = $self->m_schedule * $momentum_t;
+    my $m_schedule_next  = $self->m_schedule * $momentum_t_1;
+
+    # update m_t and v_t
+    my ($m_t, $v_t) = @{ $state };
+    $m_t .= $self->beta1 * $m_t + (1 - $self->beta1) * $grad;
+    $v_t .= $self->beta2 * $v_t + (1 - $self->beta2) * $grad * $grad;
+
+    my $grad_prime = $grad / (1 - $self->m_schedule);
+    my $m_t_prime  = $m_t  / (1 - $m_schedule_next);
+    my $v_t_prime  = $v_t  / (1 - $self->beta2**$t);
+    my $m_t_bar    = (1 - $momentum_t) * $grad_prime + $momentum_t_1 * $m_t_prime;
+
+    # update weight
+    $weight -= $lr * $m_t_bar / (sqrt($v_t_prime) + $self->epsilon);
+}
+
+__PACKAGE__->register;
+
 # updater for kvstore
 package AI::MXNet::Updater;
 use Mouse;
@@ -1088,22 +1301,44 @@ use Storable qw(thaw freeze);
 use overload "&{}" => sub { my $self = shift; sub { $self->call(@_) } },
              fallback => 1;
 
-has "optimizer" => (is => "rw", isa => "AI::MXNet::Optimizer");
-has "states"    => (is => "rw", isa => "HashRef", default => sub { +{} });
+has "optimizer"     => (is => "rw", isa => "AI::MXNet::Optimizer");
+has "states"        => (is => "rw", isa => "HashRef", default => sub { +{} });
+has "states_synced" => (is => "rw", isa => "HashRef", default => sub { +{} });
 
 method call(Index $index, AI::MXNet::NDArray $grad, AI::MXNet::NDArray $weight)
 {
     if(not exists $self->states->{ $index })
     {
         $self->states->{ $index } = $self->optimizer->create_state($index, $weight);
+        $self->states_synced->{ $index } = 1;
+    }
+    elsif(not $self->states_synced->{ $index })
+    {
+        $self->states->{ $index } = $self->sync_state_context($self->states->{ $index }, $weight->context);
+        $self->states_synced->{ $index } = 1;
     }
     $self->optimizer->update($index, $weight, $grad, $self->states->{ $index });
 }
 *slice = *call;
 
+method sync_state_context(Maybe[AI::MXNet::NDArray|ArrayRef[AI::MXNet::NDArray]] $state, AI::MXNet::Context $context)
+{
+    if(blessed $state)
+    {
+        return $state->as_in_context($context);
+    }
+    elsif(ref $state)
+    {
+        return [map { $self->sync_state_context($_, $context) } @{ $state }];
+    }
+    return $state;
+}
+
 method set_states($states)
 {
-    $self->states(thaw($states));
+    my $thawed_states = thaw($states);
+    $self->states($thawed_states);
+    %{ $self->states_synced } = map { $_ => 0 } keys %{ $thawed_states };
 }
 
 method get_states()
@@ -1113,10 +1348,9 @@ method get_states()
 
 package AI::MXNet::Optimizer;
 
-
 method get_updater(AI::MXNet::Optimizer $optimizer)
 {
     return AI::MXNet::Updater->new(optimizer => $optimizer);
 }
 
-1;
\ No newline at end of file
+1;

http://git-wip-us.apache.org/repos/asf/incubator-mxnet-test/blob/c1590296/perl-package/AI-MXNet/lib/AI/MXNet/RNN/Cell.pm
----------------------------------------------------------------------
diff --git a/perl-package/AI-MXNet/lib/AI/MXNet/RNN/Cell.pm b/perl-package/AI-MXNet/lib/AI/MXNet/RNN/Cell.pm
index 8996849..c7523aa 100644
--- a/perl-package/AI-MXNet/lib/AI/MXNet/RNN/Cell.pm
+++ b/perl-package/AI-MXNet/lib/AI/MXNet/RNN/Cell.pm
@@ -981,8 +981,8 @@ method unroll(
         name          => $self->_prefix.'rnn',
         %states
     );
-
     my $outputs;
+    my %attr = (__layout__ => 'LNC');
     if(not $self->_get_next_state)
     {
         ($outputs, $states) = ($rnn, []);
@@ -990,11 +990,14 @@ method unroll(
     elsif($self->_mode eq 'lstm')
     {
         my @rnn = @{ $rnn };
+        $rnn[1]->_set_attr(%attr);
+        $rnn[2]->_set_attr(%attr);
         ($outputs, $states) = ($rnn[0], [$rnn[1], $rnn[2]]);
     }
     else
     {
         my @rnn = @{ $rnn };
+        $rnn[1]->_set_attr(%attr);
         ($outputs, $states) = ($rnn[0], [$rnn[1]]);
     }
     if(defined $merge_outputs and not $merge_outputs)

http://git-wip-us.apache.org/repos/asf/incubator-mxnet-test/blob/c1590296/perl-package/AI-MXNet/lib/AI/MXNet/RNN/IO.pm
----------------------------------------------------------------------
diff --git a/perl-package/AI-MXNet/lib/AI/MXNet/RNN/IO.pm b/perl-package/AI-MXNet/lib/AI/MXNet/RNN/IO.pm
index 065dade..731f776 100644
--- a/perl-package/AI-MXNet/lib/AI/MXNet/RNN/IO.pm
+++ b/perl-package/AI-MXNet/lib/AI/MXNet/RNN/IO.pm
@@ -137,7 +137,7 @@ has 'invalid_label' => (is => 'ro', isa => 'Int',   default => -1);
 has 'data_name'     => (is => 'ro', isa => 'Str',   default => 'data');
 has 'label_name'    => (is => 'ro', isa => 'Str',   default => 'softmax_label');
 has 'dtype'         => (is => 'ro', isa => 'Dtype', default => 'float32');
-has 'layout'        => (is => 'ro', isa => 'Str',   default => 'NTC');
+has 'layout'        => (is => 'ro', isa => 'Str',   default => 'NT');
 has 'buckets'       => (is => 'rw', isa => 'Maybe[ArrayRef[Int]]');
 has [qw/data nddata ndlabel
         major_axis default_bucket_key
@@ -204,14 +204,16 @@ sub BUILD
         AI::MXNet::DataDesc->new(
             name  => $self->data_name,
             shape => $shape,
-            dtype => $self->dtype
+            dtype => $self->dtype,
+            layout => $self->layout
         )
     ]);
     $self->provide_label([
         AI::MXNet::DataDesc->new(
             name  => $self->label_name,
             shape => $shape,
-            dtype => $self->dtype
+            dtype => $self->dtype,
+            layout => $self->layout
         )
     ]);
     $self->idx([]);
@@ -272,14 +274,16 @@ method next()
             AI::MXNet::DataDesc->new(
                 name  => $self->data_name,
                 shape => $data->shape,
-                dtype => $self->dtype
+                dtype => $self->dtype,
+                layout => $self->layout
             )
         ],
         provide_label => [
             AI::MXNet::DataDesc->new(
                 name  => $self->label_name,
                 shape => $label->shape,
-                dtype => $self->dtype
+                dtype => $self->dtype,
+                layout => $self->layout
             )
         ],
     );

http://git-wip-us.apache.org/repos/asf/incubator-mxnet-test/blob/c1590296/perl-package/AI-MXNet/lib/AI/MXNet/Symbol.pm
----------------------------------------------------------------------
diff --git a/perl-package/AI-MXNet/lib/AI/MXNet/Symbol.pm b/perl-package/AI-MXNet/lib/AI/MXNet/Symbol.pm
index e22e418..8b14f4e 100644
--- a/perl-package/AI-MXNet/lib/AI/MXNet/Symbol.pm
+++ b/perl-package/AI-MXNet/lib/AI/MXNet/Symbol.pm
@@ -20,6 +20,7 @@ use overload
     '/'   => \&divide,
     '/='  => \&idivide,
     '**'  => \&power,
+    '%'   => \&mod,
     '=='  => \&equal,
     '!='  => \&not_equal,
     '>'   => \&greater,
@@ -169,6 +170,16 @@ method true_divide(AI::MXNet::Symbol|Num $other, $reverse=)
     return $self->divide($other, $reverse);
 }
 
+method mod(AI::MXNet::Symbol|Num $other, $reverse=)
+{
+    return _ufunc_helper(
+        $self,
+        $other,
+        qw/_Mod _ModScalar _RModScalar/,
+        $reverse
+    );
+}
+
 method maximum(AI::MXNet::Symbol|Num $other)
 {
     return _ufunc_helper(
@@ -429,6 +440,25 @@ method list_auxiliary_states()
 }
 
 
+=head2 list_inputs
+
+    Lists all arguments and auxiliary states of this Symbol.
+
+    Returns
+    -------
+    inputs : array ref of str
+    List of all inputs.
+
+    Examples
+    --------
+    >>> my $bn = mx->sym->BatchNorm(name=>'bn');
+=cut
+
+method list_inputs()
+{
+    return scalar(check_call(AI::NNVMCAPI::SymbolListInputNames($self->handle, 0)));
+}
+
 =head2 infer_type
 
         Infer the type of outputs and arguments of given known types of arguments.

http://git-wip-us.apache.org/repos/asf/incubator-mxnet-test/blob/c1590296/perl-package/AI-MXNet/lib/AI/MXNet/TestUtils.pm
----------------------------------------------------------------------
diff --git a/perl-package/AI-MXNet/lib/AI/MXNet/TestUtils.pm b/perl-package/AI-MXNet/lib/AI/MXNet/TestUtils.pm
index e6e3189..52050fa 100644
--- a/perl-package/AI-MXNet/lib/AI/MXNet/TestUtils.pm
+++ b/perl-package/AI-MXNet/lib/AI/MXNet/TestUtils.pm
@@ -9,7 +9,7 @@ use Exporter;
 use base qw(Exporter);
 @AI::MXNet::TestUtils::EXPORT_OK = qw(same reldiff almost_equal GetMNIST_ubyte
                                       GetCifar10 pdl_maximum pdl_minimum mlp2 conv
-                                      check_consistency zip assert enumerate same_array);
+                                      check_consistency zip assert enumerate same_array dies_like);
 use constant default_numerical_threshold => 1e-6;
 =head1 NAME
 
@@ -385,4 +385,18 @@ func same_array(
     return same($array1->aspdl, $array2->aspdl);
 }
 
+func dies_like($code, $regexp)
+{
+    eval { $code->() };
+    if($@ =~ $regexp)
+    {
+        return 1;
+    }
+    else
+    {
+        warn $@;
+        return 0;
+    }
+}
+
 1;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-mxnet-test/blob/c1590296/perl-package/AI-MXNet/t/test_module.t
----------------------------------------------------------------------
diff --git a/perl-package/AI-MXNet/t/test_module.t b/perl-package/AI-MXNet/t/test_module.t
index c6e3c1a..4d19a8e 100644
--- a/perl-package/AI-MXNet/t/test_module.t
+++ b/perl-package/AI-MXNet/t/test_module.t
@@ -1,9 +1,9 @@
 use strict;
 use warnings;
-use Test::More tests => 247;
+use Test::More tests => 257;
 use AI::MXNet qw(mx);
 use AI::MXNet::Base;
-use AI::MXNet::TestUtils qw(almost_equal enumerate same_array);
+use AI::MXNet::TestUtils qw(almost_equal enumerate same_array dies_like);
 use Data::Dumper;
 
 sub test_module_layout
@@ -451,6 +451,165 @@ sub test_executor_group
     );
 }
 
+sub test_module_set_params
+{
+    # data iter
+    mx->random->seed(11);
+    my $data = mx->nd->array([[0.05, .10]]);
+    my $label = mx->nd->array([[.01, 0.99]]);
+    my $train_data = mx->io->NDArrayIter(data => $data, label => $label, batch_size => 1);
+
+    # symbols
+    my $x = mx->symbol->Variable('data');
+    $x = mx->symbol->FullyConnected(name=>'fc_0', data=>$x, num_hidden=>2);
+    $x = mx->symbol->Activation(name=>"act_0", data=>$x, act_type=>'sigmoid');
+    $x = mx->symbol->FullyConnected(name=>'fc_1', data=>$x, num_hidden=>2);
+    $x = mx->symbol->Activation(name=>"act_1", data=>$x, act_type=>'sigmoid');
+    $x = mx->symbol->LinearRegressionOutput(data=>$x, name=>'softmax', grad_scale=>2);
+
+    # create module
+    my $mod = mx->mod->Module($x, context=>[mx->cpu()]);
+    $mod->bind(data_shapes => $train_data->provide_data, label_shapes=>$train_data->provide_label,
+             for_training=>1);
+
+    my $arg_params_correct = {fc_0_weight => mx->nd->array([[.15, .20], [.25, .30]]),
+                  fc_0_bias => mx->nd->array([.35, .35]),
+                  fc_1_weight =>  mx->nd->array([[.40, .45], [.50, .55]]),
+                  fc_1_bias  => mx->nd->array([.60, .60])};
+
+    my $arg_params_missing = {fc_0_weight => mx->nd->array([[.15, .20], [.25, .30]]),
+                  fc_0_bias  => mx->nd->array([.35, .35]),
+                  fc_1_weight => mx->nd->array([[.40, .45], [.50, .55]])};
+
+    my $arg_params_extra = {fc_0_weight => mx->nd->array([[.15, .20], [.25, .30]]),
+                  fc_0_bias  => mx->nd->array([.35, .35]),
+                  fc_1_weight=> mx->nd->array([[.40, .45], [.50, .55]]),
+                  fc_1_bias => mx->nd->array([.60, .60]),
+                  fc_2_weight => mx->nd->array([.60, .60])};
+
+    my $arg_params_missing_extra = {fc_3_weight => mx->nd->array([.60, .60])};
+
+    # test regular set_params
+    $mod->set_params($arg_params_correct, {}, force_init=>1);
+
+    # test allow missing
+    $mod->set_params($arg_params_missing, {}, allow_missing=>1, force_init=>1);
+    ok(dies_like(sub { $mod->set_params($arg_params_missing, {}, force_init=>1, allow_missing=>0); }, qr/fc_/));
+
+    # test allow extra
+    $mod->set_params($arg_params_extra, {}, force_init=>1, allow_missing=>1, allow_extra=>1);
+    ok(dies_like(sub { $mod->set_params($arg_params_extra, {}, force_init=>1, allow_missing=>1, allow_extra=>0); }, qr/fc_/));
+
+    # test allow missing + extra, this will throw a runtime error
+    ok(dies_like(sub { $mod->set_params($arg_params_missing_extra, {}, force_init=>1, allow_missing=>1, allow_extra=>0); }, qr/fc_/));
+}
+
+sub test_forward_reshape
+{
+    my $num_class = 10;
+    my $data1 = mx->sym->Variable('data1');
+    my $data2 = mx->sym->Variable('data2');
+    my $conv1 = mx->sym->Convolution(data=>$data1, kernel=>[2, 2], num_filter=>2, stride=>[2, 2]);
+    my $conv2 = mx->sym->Convolution(data=>$data2, kernel=>[3, 3], num_filter=>3, stride=>[1, 1]);
+    my $pooling1 = mx->sym->Pooling(data=>$conv1, kernel=>[2, 2], stride=>[1, 1], pool_type=>"avg");
+    my $pooling2 = mx->sym->Pooling(data=>$conv2, kernel=>[2, 2], stride=>[1, 1], pool_type=>"max");
+    my $flatten1 = mx->sym->flatten(data=>$pooling1);
+    my $flatten2 = mx->sym->flatten(data=>$pooling2);
+    my $sum = mx->sym->sum(data=>$flatten1, axis=>1) + mx->sym->sum(data=>$flatten2, axis=>1);
+    my $fc = mx->sym->FullyConnected(data=>$sum, num_hidden=>$num_class);
+    my $sym = mx->sym->SoftmaxOutput(data=>$fc, name=>'softmax');
+
+    my $dshape1 = [10, 3, 64, 64];
+    my $dshape2 = [10, 3, 32, 32];
+    my $lshape = [10];
+
+    my $mod = mx->mod->Module(symbol=>$sym, data_names=>['data1', 'data2'],
+                        label_names=>['softmax_label']);
+    $mod->bind(data_shapes=>[['data1', $dshape1], ['data2', $dshape2]],
+             label_shapes=>[['softmax_label', $lshape]]);
+    $mod->init_params();
+    $mod->init_optimizer(optimizer_params=>{learning_rate => 0.01});
+
+    # Train with original data shapes
+    my $data_batch = mx->io->DataBatch(data=>[mx->nd->random_uniform(0, 9, $dshape1),
+                                       mx->nd->random_uniform(5, 15, $dshape2)],
+                                 label=>[mx->nd->ones($lshape)]);
+    $mod->forward($data_batch);
+    is_deeply($mod->get_outputs->[0]->shape, [$lshape->[0], $num_class]);
+    $mod->backward();
+    $mod->update();
+
+    # Train with different batch size
+    $dshape1 = [3, 3, 64, 64];
+    $dshape2 = [3, 3, 32, 32];
+    $lshape = [3];
+    $data_batch = mx->io->DataBatch(data=>[mx->nd->random_uniform(0, 9, $dshape1),
+                                       mx->nd->random_uniform(5, 15, $dshape2)],
+                                 label=>[mx->nd->ones($lshape)]);
+    $mod->forward($data_batch);
+    is_deeply($mod->get_outputs->[0]->shape, [$lshape->[0], $num_class]);
+    $mod->backward();
+    $mod->update();
+
+    $dshape1 = [20, 3, 64, 64];
+    $dshape2 = [20, 3, 32, 32];
+    $lshape = [20];
+    $data_batch = mx->io->DataBatch(data=>[mx->nd->random_uniform(3, 5, $dshape1),
+                                       mx->nd->random_uniform(10, 25, $dshape2)],
+                                 label=>[mx->nd->ones($lshape)]);
+    $mod->forward($data_batch);
+    is_deeply($mod->get_outputs->[0]->shape, [$lshape->[0], $num_class]);
+    $mod->backward();
+    $mod->update();
+
+    #Train with both different batch size and data shapes
+    $dshape1 = [20, 3, 120, 120];
+    $dshape2 = [20, 3, 32, 64];
+    $lshape = [20];
+    $data_batch = mx->io->DataBatch(data=>[mx->nd->random_uniform(0, 9, $dshape1),
+                                       mx->nd->random_uniform(5, 15, $dshape2)],
+                                 label=>[mx->nd->ones($lshape)]);
+    $mod->forward($data_batch);
+    is_deeply($mod->get_outputs->[0]->shape, [$lshape->[0], $num_class]);
+    $mod->backward();
+    $mod->update();
+
+    $dshape1 = [5, 3, 28, 40];
+    $dshape2 = [5, 3, 24, 16];
+    $lshape = [5];
+    $data_batch = mx->io->DataBatch(data=>[mx->nd->random_uniform(0, 9, $dshape1),
+                                       mx->nd->random_uniform(15, 25, $dshape2)],
+                                 label=>[mx->nd->ones($lshape)]);
+    $mod->forward($data_batch);
+    is_deeply($mod->get_outputs->[0]->shape, [$lshape->[0], $num_class]);
+    $mod->backward();
+    $mod->update();
+
+    #Test score
+    my $dataset_shape1 = [30, 3, 30, 30];
+    my $dataset_shape2 = [30, 3, 20, 40];
+    my $labelset_shape = [30];
+
+    my $eval_dataiter = mx->io->NDArrayIter(data=>[mx->nd->random_uniform(0, 9, $dataset_shape1),
+                                            mx->nd->random_uniform(15, 25, $dataset_shape2)],
+                                      label=>[mx->nd->ones($labelset_shape)],
+                                      batch_size=>5);
+    ok(keys %{ $mod->score($eval_dataiter, 'acc') } == 1);
+
+    #Test prediction
+    $dshape1 = [1, 3, 30, 30];
+    $dshape2 = [1, 3, 20, 40];
+    $dataset_shape1 = [10, 3, 30, 30];
+    $dataset_shape2 = [10, 3, 20, 40];
+
+    my $pred_dataiter = mx->io->NDArrayIter(data=>[mx->nd->random_uniform(0, 9, $dataset_shape1),
+                                            mx->nd->random_uniform(15, 25, $dataset_shape2)]);
+    $mod->bind(data_shapes=>[['data1', $dshape1], ['data2', $dshape2]],
+             for_training=>0, force_rebind=>1);
+    is_deeply($mod->predict($pred_dataiter)->shape, [10, $num_class]);
+
+}
+
 test_module_input_grads();
 test_module_dtype();
 test_monitor();
@@ -460,3 +619,5 @@ test_module_states();
 test_module_reshape();
 test_save_load();
 test_executor_group();
+test_module_set_params();
+test_forward_reshape();
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-mxnet-test/blob/c1590296/perl-package/AI-MXNet/t/test_ndarray.t
----------------------------------------------------------------------
diff --git a/perl-package/AI-MXNet/t/test_ndarray.t b/perl-package/AI-MXNet/t/test_ndarray.t
index d4e1a4d..4faf464 100644
--- a/perl-package/AI-MXNet/t/test_ndarray.t
+++ b/perl-package/AI-MXNet/t/test_ndarray.t
@@ -2,7 +2,7 @@ use strict;
 use warnings;
 use AI::MXNet qw(mx);
 use AI::MXNet::TestUtils qw(almost_equal);
-use Test::More tests => 8;
+use Test::More tests => 10;
 
 sub test_ndarray_reshape
 {
@@ -51,6 +51,23 @@ sub test_output
     ok(almost_equal($out->aspdl, $ones->aspdl * 2));
 }
 
+sub test_cached
+{
+    my $sym = mx->sym->Convolution(kernel=>[3, 3], num_filter=>10) + 2;
+    my $op = mx->nd->CachedOp($sym);
+    my $data = mx->nd->ones([3, 4, 10, 10]);
+    my $weight = mx->nd->ones([10, 4, 3, 3]);
+    my $bias = mx->nd->ones([10]);
+    my $o1 = &{$op}($data, $weight, $bias);
+    $bias .= 2;
+    my $o2 = &{$op}($data, $weight, $bias);
+    ok(almost_equal($o2->aspdl, $o1->aspdl+1));
+    $o2 .= 0;
+    &{$op}($data, $weight, $bias, out=>$o2);
+    ok(almost_equal($o2->aspdl, $o1->aspdl+1));
+}
+
 test_ndarray_reshape();
 test_moveaxis();
 test_output();
+test_cached();

http://git-wip-us.apache.org/repos/asf/incubator-mxnet-test/blob/c1590296/perl-package/AI-MXNet/t/test_optimizers.t
----------------------------------------------------------------------
diff --git a/perl-package/AI-MXNet/t/test_optimizers.t b/perl-package/AI-MXNet/t/test_optimizers.t
index a92a788..52ff307 100644
--- a/perl-package/AI-MXNet/t/test_optimizers.t
+++ b/perl-package/AI-MXNet/t/test_optimizers.t
@@ -192,12 +192,31 @@ use Mouse;
 extends 'AI::MXNet::Optimizer';
 has '+learning_rate' => (default => 0.01);
 has 'momentum'       => (is => "ro", isa => "Num",  default => 0);
+has 'multi_precision' => (is => 'ro', isa => 'Bool', default => 0);
 
 # Create additional optimizer state: momentum
 method create_state(Index $index, AI::MXNet::NDArray $weight)
 {
-    return undef if $self->momentum == 0;
-    return mx->nd->zeros($weight->shape, ctx => $weight->context, dtype => $weight->dtype);
+    my $momentum;
+    my $weight_master_copy;
+    my $do_multi_precision = ($self->multi_precision and $weight->dtype eq 'float16');
+    if($do_multi_precision)
+    {
+        if($self->momentum != 0)
+        {
+            $momentum = mx->nd->zeros($weight->shape, ctx => $weight->context, dtype=>'float32');
+        }
+        $weight_master_copy = mx->nd->array($weight, ctx=>$weight->context, dtype=>'float32');
+        return [$momentum, $weight_master_copy];
+    }
+    else
+    {
+        if($self->momentum != 0)
+        {
+            $momentum = mx->nd->zeros($weight->shape, ctx => $weight->context, dtype => $weight->dtype);
+        }
+    }
+    return $momentum;
 }
 
 method update($index, $weight, $grad, $state)
@@ -205,48 +224,90 @@ method update($index, $weight, $grad, $state)
     my $lr = $self->_get_lr($index);
     my $wd = $self->_get_wd($index);
     $self->_update_count($index);
-    if($self->momentum == 0)
+    my $use_multi_precision = ref($state) eq 'ARRAY';
+
+    if(not $use_multi_precision)
     {
-        if(defined $self->clip_gradient)
+        if($self->momentum == 0)
         {
-            $weight .= ((1 - $lr*$wd)*$weight -
-                $lr * mx->nd->clip($grad*$self->rescale_grad, -$self->clip_gradient, $self->clip_gradient)
-            );
+            if(defined $self->clip_gradient)
+            {
+                $weight .= ((1 - $lr*$wd)*$weight -
+                    $lr * mx->nd->clip($grad*$self->rescale_grad, -$self->clip_gradient, $self->clip_gradient)
+                );
+            }
+            else
+            {
+                $weight .= (1 - $lr*$wd)*$weight - $lr*$self->rescale_grad*$grad;
+            }
         }
         else
         {
-            $weight .= (1 - $lr*$wd)*$weight - $lr*$self->rescale_grad*$grad;
+            my $mom = $state;
+            if(defined $self->clip_gradient)
+            {
+                $mom .= ($self->momentum*$mom - $lr*$wd*$weight -
+                    $lr * mx->nd->clip($grad*$self->rescale_grad, -$self->clip_gradient, $self->clip_gradient)
+                );
+                $weight += $mom;
+            }
+            else
+            {
+                $mom .= $self->momentum*$mom - $lr*$wd*$weight - $lr*$self->rescale_grad*$grad;
+                $weight += $mom;
+            }
         }
     }
     else
     {
-        my $mom = $state;
-        if(defined $self->clip_gradient)
+        my $grad32 = mx->nd->array($grad, ctx=>$grad->context, dtype=>'float32');
+        my $mom = $state->[0];
+        my $weight32 = $state->[1];
+        if($self->momentum == 0)
         {
-            $mom .= ($self->momentum*$mom - $lr*$wd*$weight -
-                $lr * mx->nd->clip($grad*$self->rescale_grad, -$self->clip_gradient, $self->clip_gradient)
-            );
-            $weight += $mom;
+            if(defined $self->clip_gradient)
+            {
+                $weight32 .= ((1 - $lr*$wd)*$weight32 -
+                    $lr * mx->nd->clip($grad32*$self->rescale_grad, -$self->clip_gradient, $self->clip_gradient)
+                );
+            }
+            else
+            {
+                $weight32 .= (1 - $lr*$wd)*$weight32 - $lr*$self->rescale_grad*$grad32;
+            }
         }
         else
         {
-            $mom .= $self->momentum*$mom - $lr*$wd*$weight - $lr*$self->rescale_grad*$grad;
-            $weight += $mom;
+            if(defined $self->clip_gradient)
+            {
+                $mom .= ($self->momentum*$mom - $lr*$wd*$weight32 -
+                    $lr * mx->nd->clip($grad32*$self->rescale_grad, -$self->clip_gradient, $self->clip_gradient)
+                );
+                $weight32 += $mom;
+            }
+            else
+            {
+                $mom .= $self->momentum*$mom - $lr*$wd*$weight32 - $lr*$self->rescale_grad*$grad32;
+                $weight32 += $mom;
+            }
         }
+        my $tmp = $weight32->astype($weight->dtype);
+        $tmp->copyto($weight);
     }
 }
 
+
 package main;
-use Test::More tests => 190;
+use Test::More tests => 1314;
 use AI::MXNet::Base;
 use PDL::NiceSlice;
 use AI::MXNet::TestUtils qw(same reldiff almost_equal);
 use AI::MXNet::Function::Parameters;
 
-func compare_optimizer($opt1, $opt2, $shape)
+func compare_optimizer($opt1, $opt2, $shape, $dtype)
 {
-    my $w1 = mx->random->uniform({shape => $shape});
-    my $g1 = mx->random->uniform({shape => $shape});
+    my $w1 = mx->random->uniform({shape => $shape, dtype=>$dtype});
+    my $g1 = mx->random->uniform({shape => $shape, dtype=>$dtype});
 
     my $w2 = $w1->copyto(mx->cpu());
     my $g2 = $g1->copyto(mx->cpu());
@@ -256,7 +317,7 @@ func compare_optimizer($opt1, $opt2, $shape)
     zip(
         sub {
             my ($s1, $s2) = @_;
-            ok(same($s1->aspdl, $s2->aspdl))
+            ok(same($s1->aspdl, $s2->aspdl)) if defined $s1 and defined $s2;
         },
         ref $state1 eq 'ARRAY' ? $state1 : [$state1], ref $state2 eq 'ARRAY' ? $state2 : [$state2]
     ) if defined $state1 and defined $state2;
@@ -266,7 +327,7 @@ func compare_optimizer($opt1, $opt2, $shape)
     zip(
         sub {
             my ($s1, $s2) = @_;
-            ok(reldiff($s1->aspdl, $s2->aspdl) < 1e-5)
+            ok(reldiff($s1->aspdl, $s2->aspdl) < 1e-5) if defined $s1 and defined $s2;
         },
         ref $state1 eq 'ARRAY' ? $state1 : [$state1], ref $state2 eq 'ARRAY' ? $state2 : [$state2]
     ) if defined $state1 and defined $state2;
@@ -285,7 +346,7 @@ func test_adam()
               {'rescale_grad'=> 0.1});
     for my $kwarg (@kwargs)
     {
-        compare_optimizer($opt1->new(%$kwarg), $opt2->new(wd => 0.9, %$kwarg), $shape);
+        compare_optimizer($opt1->new(%$kwarg), $opt2->new(wd => 0.9, %$kwarg), $shape, 'float32');
     }
 }
 
@@ -324,7 +385,7 @@ func test_rms()
               {rescale_grad  => 0.8, wd => 0.05, centered => 1, clip_weights => 0.01});
     for my $kwarg (@kwargs)
     {
-        compare_optimizer($opt1->new(%$kwarg), $opt2->new(%$kwarg), $shape);
+        compare_optimizer($opt1->new(%$kwarg), $opt2->new(%$kwarg), $shape, 'float32');
     }
 }
 
@@ -335,25 +396,40 @@ sub test_sgd
     my $opt1 = 'PerlSGD';
     my $opt2 = mx->optimizer->SGD;
     my $shape = [3, 4, 5];
-    my @kwargs = (
-                    {},
-                    {momentum => 0.9},
-                    {clip_gradient => 0.5},
-                    {clip_gradient => 0.4, rescale_grad => 0.14},
-                    {rescale_grad  => 0.8},
-                    {clip_gradient => 0.5, wd => 0.07},
-                    {clip_gradient => 0.4, rescale_grad => 0.14, wd => 0.03},
-                    {rescale_grad  => 0.8, wd => 0.05},
-                    {clip_gradient => 0.5, momentum => 0.9},
-                    {clip_gradient => 0.4, rescale_grad => 0.14, momentum => 0.9},
-                    {rescale_grad  => 0.8, momentum => 0.9},
-                    {clip_gradient => 0.5, wd => 0.07, momentum => 0.9},
-                    {clip_gradient => 0.4, rescale_grad => 0.14, wd => 0.03, momentum => 0.9},
-                    {rescale_grad  => 0.8, wd => 0.05, momentum => 0.9}
-    );
-    for my $kwarg (@kwargs)
+    my @mom_options = ({}, {momentum => 0.9});
+    my @cg_options = ({}, {clip_gradient => 0.4}, {clip_gradient => 0.5});
+    my @rg_options = ({}, {rescale_grad => 0.14}, {rescale_grad => 0.8});
+    my @wd_options = ({}, {wd => 0.03}, {wd => 0.05}, {wd => 0.07});
+    my @mp_options = ({}, {multi_precision => 0}, {multi_precision => 1});
+    for my $dtype(qw/float16 float32 float64/)
     {
-        compare_optimizer($opt1->new(%$kwarg), $opt2->new(%$kwarg), $shape);
+        for my $mom_option (@mom_options)
+        {
+            for my $cg_option (@cg_options)
+            {
+                for my $rg_option (@rg_options)
+                {
+                    for my $wd_option (@wd_options)
+                    {
+                        for my $mp_option (@mp_options)
+                        {
+                            my %kwarg;
+                            %kwarg = (%kwarg, %$mom_option);
+                            %kwarg = (%kwarg, %$cg_option);
+                            %kwarg = (%kwarg, %$rg_option);
+                            %kwarg = (%kwarg, %$wd_option);
+                            %kwarg = (%kwarg, %$mp_option);
+                            next if (
+                                $dtype eq 'float16'
+                                    and
+                                (not exists $kwarg{multi_precision} or not $kwarg{multi_precision})
+                            );
+                            compare_optimizer($opt1->new(%kwarg), $opt2->new(%kwarg), $shape, $dtype);
+                        }
+                    }
+                }
+            }
+        }
     }
 }
 
@@ -392,4 +468,3 @@ test_adam();
 test_rms();
 test_sgd();
 test_lr_wd_mult();
-

http://git-wip-us.apache.org/repos/asf/incubator-mxnet-test/blob/c1590296/perl-package/AI-MXNetCAPI/Changes
----------------------------------------------------------------------
diff --git a/perl-package/AI-MXNetCAPI/Changes b/perl-package/AI-MXNetCAPI/Changes
index df98bd9..17595b4 100644
--- a/perl-package/AI-MXNetCAPI/Changes
+++ b/perl-package/AI-MXNetCAPI/Changes
@@ -1,5 +1,8 @@
 Revision history for Perl extension AI::MXNetCAPI
 
+1.0101  Sun Jul  2 17:16:01 PDT 2017
+        - refactored CachedOp, using strings to index the kvstore.
+
 1.01    Sat Jun 10 23:57:27 PDT 2017
         - sync with python.
 

http://git-wip-us.apache.org/repos/asf/incubator-mxnet-test/blob/c1590296/perl-package/AI-MXNetCAPI/META.json
----------------------------------------------------------------------
diff --git a/perl-package/AI-MXNetCAPI/META.json b/perl-package/AI-MXNetCAPI/META.json
index 579c81c..a79b1e0 100644
--- a/perl-package/AI-MXNetCAPI/META.json
+++ b/perl-package/AI-MXNetCAPI/META.json
@@ -37,5 +37,5 @@
       }
    },
    "release_status" : "stable",
-   "version" : "1.01"
+   "version" : "1.0101"
 }

http://git-wip-us.apache.org/repos/asf/incubator-mxnet-test/blob/c1590296/perl-package/AI-MXNetCAPI/META.yml
----------------------------------------------------------------------
diff --git a/perl-package/AI-MXNetCAPI/META.yml b/perl-package/AI-MXNetCAPI/META.yml
index a36f94c..84b7801 100644
--- a/perl-package/AI-MXNetCAPI/META.yml
+++ b/perl-package/AI-MXNetCAPI/META.yml
@@ -19,4 +19,4 @@ no_index:
     - inc
 requires:
   Test::More: '0'
-version: '1.01'
+version: '1.0101'

http://git-wip-us.apache.org/repos/asf/incubator-mxnet-test/blob/c1590296/perl-package/AI-MXNetCAPI/README
----------------------------------------------------------------------
diff --git a/perl-package/AI-MXNetCAPI/README b/perl-package/AI-MXNetCAPI/README
index 3633756..07df0c3 100644
--- a/perl-package/AI-MXNetCAPI/README
+++ b/perl-package/AI-MXNetCAPI/README
@@ -1,4 +1,4 @@
-AI-MXNetCAPI version 1.01
+AI-MXNetCAPI version 1.0101
 =====================
 
 Swig interface to MXNet c api.

http://git-wip-us.apache.org/repos/asf/incubator-mxnet-test/blob/c1590296/perl-package/AI-MXNetCAPI/lib/AI/MXNetCAPI.pm
----------------------------------------------------------------------
diff --git a/perl-package/AI-MXNetCAPI/lib/AI/MXNetCAPI.pm b/perl-package/AI-MXNetCAPI/lib/AI/MXNetCAPI.pm
index 938146a..48ebe80 100644
--- a/perl-package/AI-MXNetCAPI/lib/AI/MXNetCAPI.pm
+++ b/perl-package/AI-MXNetCAPI/lib/AI/MXNetCAPI.pm
@@ -1,7 +1,7 @@
 package AI::MXNetCAPI;
 use base qw(DynaLoader);
 bootstrap AI::MXNetCAPI;
-our $VERSION = '1.01';
+our $VERSION = '1.0101';
 1;
 __END__
 

http://git-wip-us.apache.org/repos/asf/incubator-mxnet-test/blob/c1590296/perl-package/AI-MXNetCAPI/mxnet.i
----------------------------------------------------------------------
diff --git a/perl-package/AI-MXNetCAPI/mxnet.i b/perl-package/AI-MXNetCAPI/mxnet.i
index d0705d5..bf00e68 100644
--- a/perl-package/AI-MXNetCAPI/mxnet.i
+++ b/perl-package/AI-MXNetCAPI/mxnet.i
@@ -104,7 +104,7 @@ static void ExecutorMonitor_callback(const char* name, NDArrayHandle handle, voi
     }
 }
 
-%} 
+%}
 
 %init %{
     /* These SWIG_TypeClientData() calls might break in the future, but
@@ -119,6 +119,7 @@ static void ExecutorMonitor_callback(const char* name, NDArrayHandle handle, voi
     SWIG_TypeClientData(SWIGTYPE_p_MXKVStore, (void *)"KVStoreHandle");
     SWIG_TypeClientData(SWIGTYPE_p_MXRecordIO, (void *)"RecordIOHandle");
     SWIG_TypeClientData(SWIGTYPE_p_MXRtc, (void *)"RtcHandle");
+    SWIG_TypeClientData(SWIGTYPE_p_MXCachedOp, (void *)"CachedOpHandle");
 %}
 
 /*! \brief manually define unsigned int */
@@ -150,6 +151,8 @@ typedef MXKVStore *KVStoreHandle;
 typedef MXRecordIO *RecordIOHandle;
 /*! \brief handle to MXRtc*/
 typedef MXRtc *RtcHandle;
+/*! \brief handle to cached operator */
+typedef MXCachedOp *CachedOpHandle;
 
 typedef void (*ExecutorMonitorCallback)(const char*,
                                                        NDArrayHandle,
@@ -625,6 +628,23 @@ int MXAutogradBackward(mx_uint num_output,
                                  NDArrayHandle* in,
                                  int retain_graph);
 
+ /*!
+  * \brief create cached operator
+  */
+int MXCreateCachedOp(SymbolHandle handle,
+                                CachedOpHandle *out);
+ /*!
+  * \brief free cached operator
+  */
+int MXFreeCachedOp(CachedOpHandle handle);
+ /*!
+  * \brief invoke cached operator
+  */
+int MXInvokeCachedOp(CachedOpHandle handle,
+                               int num_inputs,
+                               NDArrayHandle *in,
+                               int *out_size,
+                               NDArrayHandle **out_array);
 //--------------------------------------------
 // Part 3: symbolic configuration generation
 //--------------------------------------------
@@ -1331,21 +1351,21 @@ int MXKVStoreCreate(const char *type,
  * \return 0 when success, -1 when failure happens
  */
 int MXKVStoreFree(KVStoreHandle handle);
+
 /*!
- * \brief Init a list of (key,value) pairs in kvstore
+ * \brief Init a list of (key,value) pairs in kvstore, where each key is a string
  * \param handle handle to the kvstore
  * \param num the number of key-value pairs
  * \param keys the list of keys
  * \param vals the list of values
  * \return 0 when success, -1 when failure happens
  */
-int MXKVStoreInit(KVStoreHandle handle,
-                            mx_uint num,
-                            const int* in,
-                            NDArrayHandle* in);
-
-/*!
- * \brief Push a list of (key,value) pairs to kvstore
+int MXKVStoreInitEx(KVStoreHandle handle,
+                              mx_uint num,
+                              const char** in,
+                              NDArrayHandle* in);
+ /*!
+ * \brief Push a list of (key,value) pairs to kvstore, where each key is a string
  * \param handle handle to the kvstore
  * \param num the number of key-value pairs
  * \param keys the list of keys
@@ -1353,13 +1373,13 @@ int MXKVStoreInit(KVStoreHandle handle,
  * \param priority the priority of the action
  * \return 0 when success, -1 when failure happens
  */
-int MXKVStorePush(KVStoreHandle handle,
-                            mx_uint num,
-                            const int* in,
-                            NDArrayHandle* in,
-                            int priority);
-/*!
- * \brief pull a list of (key, value) pairs from the kvstore
+int MXKVStorePushEx(KVStoreHandle handle,
+                              mx_uint num,
+                              const char** in,
+                              NDArrayHandle* in,
+                              int priority);
+ /*!
+ * \brief pull a list of (key, value) pairs from the kvstore, where each key is a string
  * \param handle handle to the kvstore
  * \param num the number of key-value pairs
  * \param keys the list of keys
@@ -1367,11 +1387,11 @@ int MXKVStorePush(KVStoreHandle handle,
  * \param priority the priority of the action
  * \return 0 when success, -1 when failure happens
  */
-int MXKVStorePull(KVStoreHandle handle,
-                            mx_uint num,
-                            const int* in,
-                            NDArrayHandle* in,
-                            int priority);
+int MXKVStorePullEx(KVStoreHandle handle,
+                              mx_uint num,
+                              const char** in,
+                              NDArrayHandle* in,
+                              int priority);
 /*!
  * \brief user-defined updater for the kvstore
  * It's this updater's responsibility to delete \a recv and \a local

http://git-wip-us.apache.org/repos/asf/incubator-mxnet-test/blob/c1590296/perl-package/AI-MXNetCAPI/mxnet_typemaps.i
----------------------------------------------------------------------
diff --git a/perl-package/AI-MXNetCAPI/mxnet_typemaps.i b/perl-package/AI-MXNetCAPI/mxnet_typemaps.i
index 792f847..640215f 100644
--- a/perl-package/AI-MXNetCAPI/mxnet_typemaps.i
+++ b/perl-package/AI-MXNetCAPI/mxnet_typemaps.i
@@ -304,6 +304,7 @@
 %typemap(freearg) (mx_float *in) {
     Safefree($1);
 }
+
 %typemap(in,numinputs=0) (NDArrayHandle *out) (NDArrayHandle temp),
                          (FunctionHandle* out) (FunctionHandle temp), 
                          (SymbolHandle *out) (SymbolHandle temp),
@@ -311,12 +312,13 @@
                          (DataIterHandle *out) (ExecutorHandle temp),
                          (KVStoreHandle *out) (KVStoreHandle temp),
                          (RecordIOHandle *out) (RecordIOHandle temp),
-                         (RtcHandle *out) (RtcHandle temp)
+                         (RtcHandle *out) (RtcHandle temp),
+                         (CachedOpHandle *out) (CachedOpHandle temp)
 {
     $1 = &temp;
 }
-%typemap(argout) (NDArrayHandle *out), (FunctionHandle* out), (SymbolHandle *out), (ExecutorHandle *out), (DataIterHandle *out), 
-                 (KVStoreHandle *out), (RecordIOHandle *out), (RtcHandle *out) (RtcHandle temp)
+%typemap(argout) (NDArrayHandle *out), (FunctionHandle* out), (SymbolHandle *out), (ExecutorHandle *out), (DataIterHandle *out),
+                 (KVStoreHandle *out), (RecordIOHandle *out), (RtcHandle *out) (RtcHandle temp), (CachedOpHandle *out) (CachedOpHandle temp)
 {
     if(!result)
     {