You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by zh...@apache.org on 2021/04/26 20:53:58 UTC
[incubator-mxnet] branch master updated: [Bugfix] Fix take gradient
(#20203)
This is an automated email from the ASF dual-hosted git repository.
zha0q1 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git
The following commit(s) were added to refs/heads/master by this push:
new 57b58b0 [Bugfix] Fix take gradient (#20203)
57b58b0 is described below
commit 57b58b0865d730e49f3b4c3387abf00d9ff38e27
Author: waytrue17 <52...@users.noreply.github.com>
AuthorDate: Mon Apr 26 13:52:16 2021 -0700
[Bugfix] Fix take gradient (#20203)
* fix take
* remove inputs in hybridblock
* remove name scope
* adjust data size
* test with dense
Co-authored-by: Wei Chu <we...@amazon.com>
---
src/operator/tensor/indexing_op.h | 6 +--
tests/python/unittest/test_operator.py | 82 ++++++++++++++++++++++++++++++++++
2 files changed, 85 insertions(+), 3 deletions(-)
diff --git a/src/operator/tensor/indexing_op.h b/src/operator/tensor/indexing_op.h
index c94ac43..e0b1deb 100644
--- a/src/operator/tensor/indexing_op.h
+++ b/src/operator/tensor/indexing_op.h
@@ -1031,12 +1031,12 @@ void TakeOpBackward(const nnvm::NodeAttrs& attrs,
Tensor<xpu, 2, DType> grad_in = outputs[0].get_with_shape<xpu, 2, DType>(
Shape2(arrshape[0], arrshape.ProdShape(1, arrshape.ndim())), s);
+ if (req[take_::kArr] == kWriteTo) {
+ grad_in = scalar<DType>(0.0f);
+ }
// re-using the previous code for axis = 0 case
if (actual_axis == 0) {
if (req[take_::kArr] == kWriteTo || req[take_::kArr] == kAddTo) {
- if (req[take_::kArr] == kWriteTo) {
- grad_in = scalar<DType>(0.0f);
- }
if (safe_acc) {
// Temporary storage for safe accumulation
size_t temp_space_size = grad_in.size(0) * grad_in.size(1) * sizeof(AType);
diff --git a/tests/python/unittest/test_operator.py b/tests/python/unittest/test_operator.py
index 88bd4c3..0748757 100644
--- a/tests/python/unittest/test_operator.py
+++ b/tests/python/unittest/test_operator.py
@@ -9453,3 +9453,85 @@ def test_zero_sized_dim():
seq_last()
seq_reverse()
seq_mask()
+
+def test_take_grads():
+ # Test for https://github.com/apache/incubator-mxnet/issues/19817
+ from mxnet.gluon.nn import HybridBlock, Conv1D, HybridSequential, HybridLambda, Dense
+ from mxnet import autograd, nd
+ from mxnet.gluon.loss import L2Loss
+
+ def get_grads(model, grads, ctx=mx.cpu()):
+ pd = model.collect_params()
+ total_grad_l2 = 0
+ total_grad_l1 = 0
+ total_grad_linf = 0
+ for p in pd:
+ try:
+ g = pd[p].grad(ctx) / N
+ g2 = (g**2).sum().as_in_context(mx.cpu()).asscalar()
+ g1 = g.abs().sum().as_in_context(mx.cpu()).asscalar()
+ ginf = g.max().as_in_context(mx.cpu()).asscalar()
+ total_grad_linf = max(total_grad_linf, ginf)
+ total_grad_l2 += g2
+ total_grad_l1 += g1
+ except Exception:
+ pass
+
+ grads.append(total_grad_l1)
+ grads.append(total_grad_l2)
+ grads.append(total_grad_linf)
+
+ def run_model(model, loss, X, Y, num_iters=5):
+ grads = []
+ for i in range(num_iters):
+ with autograd.record():
+ Y_hat = model(X)
+ ll = loss(Y_hat, Y)
+ ll = ll.sum()
+ ll.backward()
+ get_grads(model, grads)
+ return grads
+
+ def dense_layer():
+ den = HybridSequential()
+ den.add(Dense(10, flatten=True, activation='tanh'))
+ return den
+
+ class Model(HybridBlock):
+ def __init__(self, use_take=False, **kwargs):
+ super().__init__()
+ self.use_take = use_take
+ self.den = dense_layer()
+
+ def hybrid_forward(self, F, X, axis=1):
+ X1 = self.den(X)
+ if self.use_take:
+ X2 = F.take(X1, nd.array([0]), axis=axis)
+ else:
+ X2 = F.slice_axis(X1, begin=0, end=1, axis=axis)
+ return X2
+
+ N = 30
+ T = 20
+ C = 10
+
+ X = np.random.normal(size=(N, T, C))
+ Y = np.random.normal(size=(N, 1))
+ X, Y = nd.array(X), nd.array(Y)
+ seed = np.random.randint(1000)
+
+ # Using F.take
+ mx.random.seed(seed)
+ model = Model(use_take=True)
+ model.initialize()
+ loss = L2Loss()
+ grads1 = run_model(model, loss, X, Y)
+
+ # Using F.slice_axis
+ mx.random.seed(seed)
+ model2 = Model(use_take=False)
+ model2.initialize()
+ grads2 = run_model(model2, loss, X, Y)
+
+ for i in range(len(grads1)):
+ assert_almost_equal(grads1[i], grads2[i])