You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by ap...@apache.org on 2019/06/10 18:14:08 UTC
[incubator-mxnet] branch master updated: [MXNET-978] Second order
gradient support for some unary operators (#14613)
This is an automated email from the ASF dual-hosted git repository.
apeforest pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git
The following commit(s) were added to refs/heads/master by this push:
new 3c82ce2 [MXNET-978] Second order gradient support for some unary operators (#14613)
3c82ce2 is described below
commit 3c82ce2405d806c9a6e3de4d3abc9c0446f87b65
Author: Lin Yuan <ap...@gmail.com>
AuthorDate: Mon Jun 10 11:13:22 2019 -0700
[MXNET-978] Second order gradient support for some unary operators (#14613)
* try to add support some ops
* add unit test for second order grad
* implement grad for relu and add unit test
* fix lint
* register FGradient attribute for backward relu
* resolve conflict
* remove unused imports
* change gradient using set_attr
* remove higher order grad test for negative(x)
* fix lint
* reverse indent
* remove unused backward operator
* refactor backward for sin(x) and cos(x)
* change value init to list init
* change to list initialization
* generate random shape in test
* fix a bug in second order backward
* fix lint
* fix lint
* address reviewer comment and renaming
---
src/operator/tensor/elemwise_unary_op_basic.cc | 22 ++++++++-
src/operator/tensor/elemwise_unary_op_trig.cc | 60 ++++++++++++++++++++++-
tests/python/unittest/test_higher_order_grad.py | 64 ++++++++++++++++++++-----
3 files changed, 131 insertions(+), 15 deletions(-)
diff --git a/src/operator/tensor/elemwise_unary_op_basic.cc b/src/operator/tensor/elemwise_unary_op_basic.cc
index ee77817..f2b8dd6 100644
--- a/src/operator/tensor/elemwise_unary_op_basic.cc
+++ b/src/operator/tensor/elemwise_unary_op_basic.cc
@@ -85,8 +85,26 @@ The storage type of ``relu`` output depends upon the input storage type:
)code" ADD_FILELINE)
.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseOut{"_backward_relu"});
-MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU(_backward_relu,
- unary_bwd<mshadow_op::relu_grad>);
+MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU(_backward_relu, unary_bwd<mshadow_op::relu_grad>)
+.set_attr<nnvm::FGradient>("FGradient",
+ [](const nnvm::NodePtr& n, const std::vector<nnvm::NodeEntry>& ograds) {
+ std::vector<nnvm::NodeEntry> ret;
+ // ograds[0]: dL/dxgrad
+ // inputs[0]: dL/dy
+ // inputs[1]: y
+ // f(x) -> relu(x)
+ // f'(x) = 1 if x > 0 else 0
+ // f''(x) = 0
+ auto dydx = MakeNode("_greater", n->attrs.name + "_dydx",
+ {n->inputs[1], nnvm::NodeEntry{
+ MakeNode("zeros_like", n->attrs.name + "tmp", {n->inputs[1]}, nullptr, &n)
+ }}, nullptr, &n);
+ ret.emplace_back(MakeNode("elemwise_mul", n->attrs.name + "_backward_grad_grad",
+ {ograds[0], nnvm::NodeEntry(dydx)}, nullptr, &n));
+ ret.emplace_back(MakeNode("zeros_like", n->attrs.name + "_backward_grad_grad_in",
+ {n->inputs[1]}, nullptr, &n));
+ return ret;
+ });
// sigmoid
MXNET_OPERATOR_REGISTER_UNARY(sigmoid)
diff --git a/src/operator/tensor/elemwise_unary_op_trig.cc b/src/operator/tensor/elemwise_unary_op_trig.cc
index 28a11cc..b7cf76e 100644
--- a/src/operator/tensor/elemwise_unary_op_trig.cc
+++ b/src/operator/tensor/elemwise_unary_op_trig.cc
@@ -46,7 +46,33 @@ The storage type of ``sin`` output depends upon the input storage type:
)code" ADD_FILELINE)
.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseIn{ "_backward_sin" });
-MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU_DR(_backward_sin, unary_bwd<mshadow_op::sin_grad>);
+MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU_DR(_backward_sin, unary_bwd<mshadow_op::sin_grad>)
+.set_attr<nnvm::FGradient>("FGradient",
+ [](const nnvm::NodePtr& n, const std::vector<nnvm::NodeEntry>& ograds) {
+ // ograds[0]: d^2L/dx^2
+ // inputs[0]: dL/dy
+ // inputs[1]: x (ElemwiseUseIn)
+ // f(x) = sin(x)
+ // f'(x) = cos(x)
+ // f''(x) = -sin(x)
+ auto dydx = MakeNode("cos", n->attrs.name + "_dydx",
+ {n->inputs[1]}, nullptr, &n);
+ auto d2ydx2 = MakeNode("negative", n->attrs.name + "_d2ydx2",
+ {nnvm::NodeEntry{
+ MakeNode("sin", n->attrs.name + "_grad_grad_mid", {n->inputs[1]}, nullptr, &n)
+ }}, nullptr, &n);
+
+ auto grad_grad_mid = MakeNode("elemwise_mul", n->attrs.name + "backward_grad_grad_mid",
+ {n->inputs[0], nnvm::NodeEntry{d2ydx2}}, nullptr, &n);
+
+ std::vector<nnvm::NodeEntry> ret;
+
+ ret.emplace_back(MakeNode("elemwise_mul", n->attrs.name + "_backward_grad_grad",
+ {ograds[0], nnvm::NodeEntry{dydx}}, nullptr, &n));
+ ret.emplace_back(MakeNode("elemwise_mul", n->attrs.name + "_backward_grad_grad_in",
+ {ograds[0], nnvm::NodeEntry{grad_grad_mid}}, nullptr, &n));
+ return ret;
+ });
// cos
MXNET_OPERATOR_REGISTER_UNARY_WITH_SPARSE_DR(cos, cpu, mshadow_op::cos)
@@ -63,7 +89,37 @@ The storage type of ``cos`` output is always dense
)code" ADD_FILELINE)
.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseIn{"_backward_cos"});
-MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU(_backward_cos, unary_bwd<mshadow_op::cos_grad>);
+MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU(_backward_cos, unary_bwd<mshadow_op::cos_grad>)
+.set_attr<nnvm::FGradient>("FGradient",
+ [](const nnvm::NodePtr& n, const std::vector<nnvm::NodeEntry>& ograds) {
+ // ograds[0]: d^2L/dx^2
+ // inputs[0]: dL/dy
+ // inputs[1]: x (ElemwiseUseIn)
+ // f(x) = cos(x)
+ // f'(x) = -sin(x)
+ // f''(x) = -cos(x)
+ auto dydx = MakeNode("negative", n->attrs.name + "_dydx",
+ {nnvm::NodeEntry{
+ MakeNode("sin", n->attrs.name + "_grad_mid", {n->inputs[1]}, nullptr, &n)
+ }}, nullptr, &n);
+ auto d2ydx2 = MakeNode("negative", n->attrs.name + "_d2ydx2",
+ {nnvm::NodeEntry{
+ MakeNode("cos", n->attrs.name + "_grad_grad_mid", {n->inputs[1]}, nullptr, &n)
+ }}, nullptr, &n);
+
+ auto grad_grad_mid = MakeNode("elemwise_mul", n->attrs.name + "_backward_grad_grad_mid",
+ {n->inputs[0], nnvm::NodeEntry{d2ydx2}}, nullptr, &n);
+
+ std::vector<nnvm::NodeEntry> ret;
+ // for the backward of the _backward_cos node
+ // first input is the ograd and second input is x (because ElemwiseUseIn)
+ ret.emplace_back(MakeNode("elemwise_mul", n->attrs.name + "_backward_grad_grad",
+ {ograds[0], nnvm::NodeEntry{dydx}}, nullptr, &n));
+ ret.emplace_back(MakeNode("elemwise_mul", n->attrs.name + "_backward_grad_grad_in",
+ {ograds[0], nnvm::NodeEntry{grad_grad_mid}}, nullptr, &n));
+ return ret;
+ });
+
// tan
MXNET_OPERATOR_REGISTER_UNARY_WITH_RSP_CSR(tan, cpu, mshadow_op::tan)
diff --git a/tests/python/unittest/test_higher_order_grad.py b/tests/python/unittest/test_higher_order_grad.py
index 92c78d1..77bfa68 100644
--- a/tests/python/unittest/test_higher_order_grad.py
+++ b/tests/python/unittest/test_higher_order_grad.py
@@ -15,14 +15,56 @@
# specific language governing permissions and limitations
# under the License.
-import math
+import math
from mxnet import nd, autograd
-from mxnet.test_utils import assert_almost_equal, random_arrays
+from mxnet.test_utils import assert_almost_equal, random_arrays, rand_shape_nd
from common import with_seed
@with_seed()
+def test_sin():
+ def sin(x):
+ return nd.sin(x)
+
+ def grad_grad_op(x):
+ return -nd.sin(x)
+
+ for dim in range(1, 5):
+ shape = rand_shape_nd(dim)
+ array = random_arrays(shape)
+ check_second_order_unary(array, sin, grad_grad_op)
+
+
+@with_seed()
+def test_cos():
+ def cos(x):
+ return nd.cos(x)
+
+ def grad_grad_op(x):
+ return -nd.cos(x)
+
+ for dim in range(1, 5):
+ shape = rand_shape_nd(dim)
+ array = random_arrays(shape)
+ check_second_order_unary(array, cos, grad_grad_op)
+
+
+@with_seed()
+def test_relu():
+ def relu(x):
+ return nd.relu(x)
+
+ def grad_grad_op(x):
+ return nd.zeros_like(x)
+
+ for dim in range(1, 5):
+ shape = rand_shape_nd(dim)
+ array = random_arrays(shape)
+ check_second_order_unary(array, relu, grad_grad_op)
+
+
+@with_seed()
def test_log():
def log(x):
return nd.log(x)
@@ -30,9 +72,9 @@ def test_log():
def grad_grad_op(x):
return -1/(x**2)
- arrays = random_arrays((2, 2), (2, 3), (4, 5, 2), (3, 1, 4, 5))
-
- for array in arrays:
+ for dim in range(1, 5):
+ shape = rand_shape_nd(dim)
+ array = random_arrays(shape)
check_second_order_unary(array, log, grad_grad_op)
@@ -44,9 +86,9 @@ def test_log2():
def grad_grad_op(x):
return -1/((x**2) * math.log(2))
- arrays = random_arrays((2, 2), (2, 3), (4, 5, 2), (3, 1, 4, 5))
-
- for array in arrays:
+ for dim in range(1, 5):
+ shape = rand_shape_nd(dim)
+ array = random_arrays(shape)
check_second_order_unary(array, log2, grad_grad_op)
@@ -58,9 +100,9 @@ def test_log10():
def grad_grad_op(x):
return -1/((x**2) * math.log(10))
- arrays = random_arrays((2, 2), (2, 3), (4, 5, 2), (3, 1, 4, 5))
-
- for array in arrays:
+ for dim in range(1, 5):
+ shape = rand_shape_nd(dim)
+ array = random_arrays(shape)
check_second_order_unary(array, log10, grad_grad_op)