You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by ap...@apache.org on 2019/05/21 17:44:01 UTC
[incubator-mxnet] branch develop/higher_order_grad updated: Add
second order gradient for a list of basic operators (#15024)
This is an automated email from the ASF dual-hosted git repository.
apeforest pushed a commit to branch develop/higher_order_grad
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git
The following commit(s) were added to refs/heads/develop/higher_order_grad by this push:
new 8c41b03 Add second order gradient for a list of basic operators (#15024)
8c41b03 is described below
commit 8c41b03b6fcb84f1b06edd135da2af008881a8f9
Author: Lin Yuan <ap...@gmail.com>
AuthorDate: Tue May 21 10:43:40 2019 -0700
Add second order gradient for a list of basic operators (#15024)
* try to add support some ops
* add unit test for second order grad
* implement grad for relu and add unit test
* fix lint
---
src/imperative/imperative.cc | 5 +-
src/operator/tensor/elemwise_binary_op_basic.cc | 12 +++-
src/operator/tensor/elemwise_unary_op_basic.cc | 21 +++++-
src/operator/tensor/elemwise_unary_op_trig.cc | 22 +++++-
tests/python/unittest/test_higher_order_grad.py | 89 +++++++++++++++++++++++++
5 files changed, 142 insertions(+), 7 deletions(-)
diff --git a/src/imperative/imperative.cc b/src/imperative/imperative.cc
index a1c41ee..215b1d8 100644
--- a/src/imperative/imperative.cc
+++ b/src/imperative/imperative.cc
@@ -349,8 +349,9 @@ std::vector<NDArray*> Imperative::Backward(
x_reqs.push_back(info.grad_req);
info.fresh_out_grad = true;
}
- CHECK_GT(xs.size(), 0)
- << "There are no inputs in computation graph that require gradients.";
+ if (xs.empty()) {
+ LOG(WARNING) << "There are no inputs in computation graph that require gradients.";
+ }
}
Graph g_graph = pass::MXGradient(
diff --git a/src/operator/tensor/elemwise_binary_op_basic.cc b/src/operator/tensor/elemwise_binary_op_basic.cc
index c5e30c6..2e1f979 100644
--- a/src/operator/tensor/elemwise_binary_op_basic.cc
+++ b/src/operator/tensor/elemwise_binary_op_basic.cc
@@ -233,7 +233,17 @@ The storage type of ``elemwise_mul`` output depends on storage types of inputs
return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};
})
.add_alias("_mul").add_alias("_Mul")
-.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseIn{"_backward_mul"});
+.set_attr<nnvm::FGradient>("FGradient",
+ [](const nnvm::NodePtr& n, const std::vector<nnvm::NodeEntry>& ograds) {
+ auto lhs_grad = MakeNode("elemwise_mul", n->attrs.name + "_backward_lhs",
+ {ograds[0], n->inputs[1]}, nullptr, &n);
+ auto rhs_grad = MakeNode("elemwise_mul", n->attrs.name + "_backward_rhs",
+ {ograds[0], n->inputs[0]}, nullptr, &n);
+ std::vector<nnvm::NodeEntry> ret;
+ ret.emplace_back(nnvm::NodeEntry{lhs_grad, 0, 0});
+ ret.emplace_back(nnvm::NodeEntry{rhs_grad, 0, 0});
+ return ret;
+ });
NNVM_REGISTER_OP(_backward_mul)
.set_num_inputs(3)
diff --git a/src/operator/tensor/elemwise_unary_op_basic.cc b/src/operator/tensor/elemwise_unary_op_basic.cc
index 1634606..b87e308 100644
--- a/src/operator/tensor/elemwise_unary_op_basic.cc
+++ b/src/operator/tensor/elemwise_unary_op_basic.cc
@@ -83,7 +83,18 @@ The storage type of ``relu`` output depends upon the input storage type:
- relu(csr) = csr
)code" ADD_FILELINE)
-.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseOut{"_backward_relu"});
+.set_attr<nnvm::FGradient>("FGradient",
+ [](const nnvm::NodePtr& n, const std::vector<nnvm::NodeEntry>& ograds) {
+ auto zero_node = MakeNode("zeros_like", n->attrs.name + "_backward",
+ {n->inputs[0]}, nullptr, &n);
+ auto x_grad = MakeNode("_greater", n->attrs.name + "_mid_x_grad",
+ {n->inputs[0], nnvm::NodeEntry{zero_node, 0, 0}}, nullptr, &n);
+ auto in_grad = MakeNode("elemwise_mul", n->attrs.name + "_backward",
+ {ograds[0], nnvm::NodeEntry{x_grad, 0 , 0}}, nullptr, &n);
+ std::vector<nnvm::NodeEntry> ret;
+ ret.emplace_back(nnvm::NodeEntry{in_grad, 0, 0});
+ return ret;
+ });
MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU(_backward_relu,
unary_bwd<mshadow_op::relu_grad>);
@@ -656,7 +667,13 @@ The storage type of ``negative`` output depends upon the input storage type:
- negative(csr) = csr
)code")
-.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{"negative"});
+.set_attr<nnvm::FGradient>("FGradient",
+ [](const nnvm::NodePtr& n, const std::vector<nnvm::NodeEntry>& ograds) {
+ auto in_grad = MakeNode("negative", n->attrs.name + "_backward", {ograds[0]}, nullptr, &n);
+ std::vector<nnvm::NodeEntry> ret;
+ ret.emplace_back(nnvm::NodeEntry{in_grad, 0, 0});
+ return ret;
+ });
// reciprocal
MXNET_OPERATOR_REGISTER_UNARY(reciprocal)
diff --git a/src/operator/tensor/elemwise_unary_op_trig.cc b/src/operator/tensor/elemwise_unary_op_trig.cc
index 28a11cc..3308d5f 100644
--- a/src/operator/tensor/elemwise_unary_op_trig.cc
+++ b/src/operator/tensor/elemwise_unary_op_trig.cc
@@ -44,7 +44,15 @@ The storage type of ``sin`` output depends upon the input storage type:
- sin(csr) = csr
)code" ADD_FILELINE)
-.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseIn{ "_backward_sin" });
+.set_attr<nnvm::FGradient>("FGradient",
+ [](const nnvm::NodePtr& n, const std::vector<nnvm::NodeEntry>& ograds) {
+ auto x_grad = MakeNode("cos", n->attrs.name + "_mid_x_grad", {n->inputs[0]}, nullptr, &n);
+ auto in_grad = MakeNode("elemwise_mul", n->attrs.name + "_backward",
+ {ograds[0], nnvm::NodeEntry{x_grad, 0, 0}}, nullptr, &n);
+ std::vector<nnvm::NodeEntry> ret;
+ ret.emplace_back(nnvm::NodeEntry{in_grad, 0, 0});
+ return ret;
+ });
MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU_DR(_backward_sin, unary_bwd<mshadow_op::sin_grad>);
@@ -61,7 +69,17 @@ The input should be in radians (:math:`2\pi` rad equals 360 degrees).
The storage type of ``cos`` output is always dense
)code" ADD_FILELINE)
-.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseIn{"_backward_cos"});
+.set_attr<nnvm::FGradient>("FGradient",
+ [](const nnvm::NodePtr& n, const std::vector<nnvm::NodeEntry>& ograds) {
+ auto x_grad = MakeNode("sin", n->attrs.name + "_mid_x_grad", {n->inputs[0]}, nullptr, &n);
+ auto neg_x_grad = MakeNode("negative", n->attrs.name + "_mid_neg_x_grad",
+ {nnvm::NodeEntry{x_grad, 0, 0}}, nullptr, &n);
+ auto in_grad = MakeNode("elemwise_mul", n->attrs.name + "_backward",
+ {ograds[0], nnvm::NodeEntry{neg_x_grad, 0, 0}}, nullptr, &n);
+ std::vector<nnvm::NodeEntry> ret;
+ ret.emplace_back(nnvm::NodeEntry{in_grad, 0, 0});
+ return ret;
+ });
MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU(_backward_cos, unary_bwd<mshadow_op::cos_grad>);
diff --git a/tests/python/unittest/test_higher_order_grad.py b/tests/python/unittest/test_higher_order_grad.py
new file mode 100644
index 0000000..4b6bce7
--- /dev/null
+++ b/tests/python/unittest/test_higher_order_grad.py
@@ -0,0 +1,89 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import mxnet as mx
+import numpy as np
+from mxnet import gluon, nd, autograd
+from mxnet.test_utils import assert_almost_equal
+from tests.python.unittest.common import with_seed
+
+
+@with_seed()
+def test_elemwise_mul():
+ x = nd.array([1, 2, 3])
+ y = nd.zeros(3)
+ x.attach_grad()
+ with autograd.record():
+ y = nd.elemwise_mul(x, x)
+ y_grad = autograd.grad(y, x, create_graph=True, retain_graph=True)[0]
+ y_grad.backward()
+ expect_grad = nd.array([2, 2, 2])
+ assert_almost_equal(expect_grad.asnumpy(), x.grad.asnumpy())
+
+
+@with_seed()
+def test_sin():
+ def sin(x):
+ return nd.sin(x)
+
+ x = nd.array([1, 2, 3])
+ expect_grad = -nd.sin(x)
+ check_second_order_unary(x, sin, expect_grad)
+
+
+@with_seed()
+def test_cos():
+ def cos(x):
+ return nd.cos(x)
+
+ x = nd.array([1, 2, 3])
+ expect_grad = -nd.cos(x)
+ check_second_order_unary(x, cos, expect_grad)
+
+
+@with_seed()
+def test_negative():
+ def negative(x):
+ return nd.negative(x)
+
+ x = nd.array([1, 2, 3])
+ expect_grad = nd.zeros_like(x)
+ check_second_order_unary(x, negative, expect_grad)
+
+
+@with_seed()
+def test_relu():
+ def relu(x):
+ return nd.relu(x)
+
+ x = nd.array([1, 2, 3])
+ expect_grad = nd.zeros_like(x)
+ check_second_order_unary(x, relu, expect_grad)
+
+
+def check_second_order_unary(x, op, expect_grad):
+ x.attach_grad()
+ with autograd.record():
+ y = op(x)
+ y_grad = autograd.grad(y, x, create_graph=True, retain_graph=True)[0]
+ y_grad.backward()
+ assert_almost_equal(expect_grad.asnumpy(), x.grad.asnumpy())
+
+
+if __name__ == '__main__':
+ import nose
+ nose.runmodule()