You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by th...@apache.org on 2019/01/24 17:54:54 UTC
[incubator-mxnet] branch master updated: Gradient multiplier
(contrib) operator (#13632)
This is an automated email from the ASF dual-hosted git repository.
thomasdelteil pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git
The following commit(s) were added to refs/heads/master by this push:
new 183be8c Gradient multiplier (contrib) operator (#13632)
183be8c is described below
commit 183be8cfb91582dfa2891555536b3c01ffebe169
Author: Istvan Fehervari <go...@gmail.com>
AuthorDate: Thu Jan 24 09:54:25 2019 -0800
Gradient multiplier (contrib) operator (#13632)
* Added the gradient reversal contrib operator
Missing test for backwards pass
* Fixed linting errors
* Fixed forward test
* Added random forward / backward test for gradient reversal
* Update test_contrib_operator.py
* Fixed typo in gradient reversal op description
* Replace forward code with the identitiy implementation
* Fixed typos in function docs
* Changed default behavior to identity
* Replaced backward code with scalar_mul
* Fixed backward operator and unit test
* Renamed operator to gradient multiplier
* Update test_contrib_operator.py
Retrigger flaky test
* Update gradient_multiplier_op.cc
Improved the description of the scalar multiplier
---
include/mxnet/op_attr_types.h | 6 +-
src/operator/contrib/gradient_multiplier_op.cc | 99 ++++++++++++++++++++++++++
src/operator/contrib/gradient_multiplier_op.cu | 41 +++++++++++
tests/python/unittest/test_contrib_operator.py | 36 ++++++++++
4 files changed, 179 insertions(+), 3 deletions(-)
diff --git a/include/mxnet/op_attr_types.h b/include/mxnet/op_attr_types.h
index dd81845..41be554 100644
--- a/include/mxnet/op_attr_types.h
+++ b/include/mxnet/op_attr_types.h
@@ -254,7 +254,7 @@ using FNDArrayFunction = std::function<void (const nnvm::NodeAttrs& attrs,
const std::vector<NDArray>& inputs,
std::vector<NDArray>* outputs)>;
/*!
- * \brief Resiger a compute function for simple stateless forward only operator
+ * \brief Register a compute function for simple stateless forward only operator
*
* \note Register under "FCompute<cpu>" and "FCompute<gpu>"
*/
@@ -264,7 +264,7 @@ using FCompute = std::function<void (const nnvm::NodeAttrs& attrs,
const std::vector<OpReqType>& req,
const std::vector<TBlob>& outputs)>;
/*!
- * \brief Resiger an NDArray compute function for simple stateless forward only operator
+ * \brief Register an NDArray compute function for simple stateless forward only operator
* \note Register under "FComputeEx<xpu>" and "FComputeEx<xpu>"
* Dispatched only when inferred dispatch_mode is FDispatchComputeEx
*/
@@ -275,7 +275,7 @@ using FComputeEx = std::function<void (const nnvm::NodeAttrs& attrs,
const std::vector<NDArray>& outputs)>;
/*!
- * \brief Resiger a storage and dispatch mode inference function based on
+ * \brief Register a storage and dispatch mode inference function based on
* storage types of the inputs and outputs, and the dev_mask for the operator.
*
* \note Register under "FInferStorageType"
diff --git a/src/operator/contrib/gradient_multiplier_op.cc b/src/operator/contrib/gradient_multiplier_op.cc
new file mode 100644
index 0000000..47f891e
--- /dev/null
+++ b/src/operator/contrib/gradient_multiplier_op.cc
@@ -0,0 +1,99 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * Copyright (c) 2018 by Contributors
+ * \file gradient_multiplier_op.cc
+ * \brief
+ * \author Istvan Fehervari
+*/
+#include "../tensor/elemwise_unary_op.h"
+#include "../tensor/elemwise_binary_scalar_op.h"
+
+namespace mxnet {
+namespace op {
+
+static bool BinaryScalarStorageType(const nnvm::NodeAttrs& attrs,
+ const int dev_mask,
+ DispatchMode* dispatch_mode,
+ std::vector<int> *in_attrs,
+ std::vector<int> *out_attrs) {
+ CHECK_EQ(in_attrs->size(), 1);
+ CHECK_EQ(out_attrs->size(), 1);
+ const auto in_stype = in_attrs->at(0);
+ auto &out_stype = out_attrs->at(0);
+ bool dispatched = false;
+ if (!dispatched && (in_stype == kDefaultStorage)) {
+ // dense -> dense
+ dispatched = storage_type_assign(&out_stype, kDefaultStorage,
+ dispatch_mode, DispatchMode::kFCompute);
+ }
+ if (!dispatched && in_stype == kRowSparseStorage) {
+ // row sparse -> row sparse
+ dispatched = storage_type_assign(&out_stype, kRowSparseStorage,
+ dispatch_mode, DispatchMode::kFComputeEx);
+ // FComputeEx can handle dns output on cpu, too
+ if (dev_mask == cpu::kDevMask && out_stype == kDefaultStorage) {
+ DISPATCH_MODE_ASSIGN_CHECK(dispatch_mode, 0, DispatchMode::kFComputeEx);
+ dispatched = true;
+ }
+ }
+ if (!dispatched && in_stype == kCSRStorage) {
+ // csr -> csr
+ dispatched = storage_type_assign(&out_stype, kCSRStorage,
+ dispatch_mode, DispatchMode::kFComputeEx);
+ // FComputeEx can handle dns output on cpu, too
+ if (dev_mask == cpu::kDevMask && out_stype == kDefaultStorage) {
+ DISPATCH_MODE_ASSIGN_CHECK(dispatch_mode, 0, DispatchMode::kFComputeEx);
+ dispatched = true;
+ }
+ }
+ if (!dispatched) {
+ dispatched = dispatch_fallback(out_attrs, dispatch_mode);
+ }
+ return dispatched;
+}
+
+MXNET_OPERATOR_REGISTER_UNARY(_contrib_gradientmultiplier)
+.describe(R"code(This operator implements the gradient multiplier function.
+In forward pass it acts as an identity transform. During backpropagation it
+multiplies the gradient from the subsequent level by a scalar factor lambda and passes it to
+the preceding layer.
+)code" ADD_FILELINE)
+.set_attr_parser([](NodeAttrs* attrs) {
+ attrs->parsed = std::stod(attrs->dict["scalar"]);
+ })
+.set_attr<FInferStorageType>("FInferStorageType", ElemwiseStorageType<1, 1, false, true, true>)
+.set_attr<FCompute>("FCompute<cpu>", UnaryOp::IdentityCompute<cpu>)
+.set_attr<FComputeEx>("FComputeEx<cpu>", UnaryOp::IdentityComputeEx<cpu>)
+.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{"_contrib_backward_gradientmultiplier"})
+.set_attr<nnvm::FInplaceIdentity>("FInplaceIdentity",
+ [](const NodeAttrs& attrs){
+ return std::vector<bool>{true};
+ })
+.add_argument("scalar", "float", "lambda multiplier");
+
+MXNET_OPERATOR_REGISTER_BINARY_SCALAR(_contrib_backward_gradientmultiplier)
+.set_attr<nnvm::TIsBackward>("TIsBackward", true)
+.set_attr<FInferStorageType>("FInferStorageType", BinaryScalarStorageType)
+.set_attr<FCompute>("FCompute<cpu>", BinaryScalarOp::Compute<cpu, op::mshadow_op::mul>)
+.set_attr<FComputeEx>("FComputeEx<cpu>", BinaryScalarOp::ComputeEx<cpu, op::mshadow_op::mul>);
+
+} // namespace op
+} // namespace mxnet
diff --git a/src/operator/contrib/gradient_multiplier_op.cu b/src/operator/contrib/gradient_multiplier_op.cu
new file mode 100644
index 0000000..7159cea
--- /dev/null
+++ b/src/operator/contrib/gradient_multiplier_op.cu
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * Copyright (c) 2018 by Contributors
+ * \file gradient_multiplier_op.cu
+ * \brief
+ * \author Istvan Fehervari
+*/
+#include "../tensor/elemwise_unary_op.h"
+#include "../tensor/elemwise_binary_scalar_op.h"
+
+namespace mxnet {
+namespace op {
+
+NNVM_REGISTER_OP(_contrib_gradientmultiplier)
+.set_attr<FComputeEx>("FComputeEx<gpu>", UnaryOp::IdentityComputeEx<gpu>)
+.set_attr<FCompute>("FCompute<gpu>", UnaryOp::IdentityCompute<gpu>);
+
+NNVM_REGISTER_OP(_contrib_backward_gradientmultiplier)
+.set_attr<FCompute>("FCompute<gpu>", BinaryScalarOp::Compute<gpu, op::mshadow_op::mul>)
+.set_attr<FComputeEx>("FComputeEx<gpu>", BinaryScalarOp::ComputeEx<gpu, op::mshadow_op::mul>);
+
+} // namespace op
+} // namespace mxnet
diff --git a/tests/python/unittest/test_contrib_operator.py b/tests/python/unittest/test_contrib_operator.py
index 43d3db6..aac8076 100644
--- a/tests/python/unittest/test_contrib_operator.py
+++ b/tests/python/unittest/test_contrib_operator.py
@@ -261,6 +261,42 @@ def test_multibox_target_op():
assert_array_equal(loc_mask.asnumpy(), expected_loc_mask)
assert_array_equal(cls_target.asnumpy(), expected_cls_target)
+def test_gradient_multiplier_op():
+ # We use the quadratic function in combination with gradient multiplier
+ def f(x, a, b, c):
+ return a * x**2 + b * x + c
+
+ a = np.random.random_sample()
+ b = np.random.random_sample()
+ c = np.random.random_sample()
+ m = np.random.random_sample() - 0.5
+
+ data = mx.symbol.Variable('data')
+ quad_sym = mx.sym.contrib.quadratic(data=data, a=a, b=b, c=c)
+ gr_q_sym = mx.sym.contrib.gradientmultiplier(quad_sym, scalar=m)
+
+ for dtype in [np.float16, np.float32, np.float64]:
+ for ndim in range(1, 6):
+ shape = rand_shape_nd(ndim, 5)
+ data_np = np.random.randn(*shape).astype(dtype)
+ expected = f(data_np, a, b, c)
+ backward_expected = (2 * a * data_np + b) * m
+
+ # check imperative forward
+ output = mx.nd.contrib.quadratic(mx.nd.array(data_np), a=a, b=b, c=c)
+ output = mx.nd.contrib.gradientmultiplier(output, scalar=m)
+ assert_almost_equal(output.asnumpy(), expected,
+ rtol=1e-2 if dtype is np.float16 else 1e-5,
+ atol=1e-2 if dtype is np.float16 else 1e-5)
+ # check forward
+ check_symbolic_forward(gr_q_sym, [data_np], [expected],
+ rtol=1e-2 if dtype is np.float16 else 1e-5,
+ atol=1e-2 if dtype is np.float16 else 1e-5)
+ # check backward
+ check_symbolic_backward(gr_q_sym, [data_np], [np.ones(expected.shape)],
+ [backward_expected],
+ rtol=1e-2 if dtype is np.float16 else 1e-5,
+ atol=1e-2 if dtype is np.float16 else 1e-5)
if __name__ == '__main__':
import nose