You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@mxnet.apache.org by GitBox <gi...@apache.org> on 2021/02/08 19:30:45 UTC
[GitHub] [incubator-mxnet] szha commented on issue #19817: F.Take Backwards - Incorrect Gradient
szha commented on issue #19817:
URL: https://github.com/apache/incubator-mxnet/issues/19817#issuecomment-775388041
I can confirm that this bug has been fixed on master branch. Here are the outputs from the master branch (after adopting the new Gluon interface)
<details><summary>script</summary>
```python
import numpy as np
import mxnet as mx
from mxnet.gluon.nn import HybridBlock, Conv1D, HybridSequential, HybridLambda, Dense
from mxnet import autograd, nd
from mxnet.gluon.loss import L2Loss
def print_grads(model, ctx=mx.cpu()):
pd = model.collect_params()
total_grad_l2 = 0
total_grad_l1 = 0
total_grad_linf = 0
for p in pd:
try:
g = pd[p].grad(ctx) / N
g2 = (g**2).sum().as_in_context(mx.cpu()).asscalar()
g1 = g.abs().sum().as_in_context(mx.cpu()).asscalar()
ginf = g.max().as_in_context(mx.cpu()).asscalar()
total_grad_linf = max(total_grad_linf, ginf)
total_grad_l2 += g2
total_grad_l1 += g1
print(f"||g_param||_2: {g2**0.5:.2E} | Param: {p}")
except Exception:
pass
grad_info = f"""
-------------------------------------------
------- Grad Info
* ||g||_2: {total_grad_l2**0.5:.2E}
* ||g||_1: {total_grad_l1:.2E}
* ||g||_inf: {total_grad_linf:.2E}
"""
print(grad_info)
def run_model(model, loss, X, Y, num_iters=1):
for i in range(num_iters):
with autograd.record():
Y_hat = model(X)
ll = loss(Y_hat, Y)
ll = ll.sum()
ll.backward()
print_grads(model)
return Y_hat
def conv_layer(atrous_rates, num_channels):
convs = HybridSequential()
convs.add(HybridLambda(lambda F, x: F.transpose(x, (0, 2, 1))))
for rate in atrous_rates:
convs.add(Conv1D(num_channels, 3, padding=rate, dilation=rate, activation='tanh'))
convs.add(HybridLambda(lambda F, x: F.transpose(x, (0, 2, 1))))
return convs
class Model(HybridBlock):
"""
Model takes tensors of shape N x T x C and produces predictions with shape N x T
"""
def __init__(self, conv_units, atrous_rates, use_take=False, **kwargs):
super().__init__()
self.use_take = use_take
self.convs = conv_layer(atrous_rates, conv_units)
self.dense_out = Dense(1, flatten=False, activation='tanh')
def hybrid_forward(self, F, X):
X1 = X
X2 = self.convs(X1)
if self.use_take:
X3 = F.take(X2, nd.array([1, 2, 3]), axis=-1)
else:
X3 = F.slice_axis(X2, begin=1, end=4, axis=-1)
X4 = self.dense_out(X3)
X4 = F.squeeze(X4, axis=-1)
return X4
if __name__ == "__main__":
N = 30
T = 20
C = 8
conv_units = 5
atrous_rates = [1, 2, 4]
np.random.seed(1234)
X = np.random.normal(size=(N, T, C))
Y = np.random.normal(size=(N, T))
X, Y = nd.array(X), nd.array(Y)
# Using F.take
mx.random.seed(12354)
model = Model(conv_units, atrous_rates, use_take=True)
model.initialize()
loss = L2Loss()
Y_hat1 = run_model(model, loss, X, Y)
# Using F.slice_axis
mx.random.seed(12354)
model2 = Model(conv_units, atrous_rates, use_take=False)
model2.initialize()
loss2 = L2Loss()
Y_hat2 = run_model(model2, loss2, X, Y)
delta = nd.abs(Y_hat1-Y_hat2).sum().asscalar()
print("==== Same outputs?")
print(f"Y_hat1 - Yhat2 = {delta:.4f}")
```
</details>
```
▶ python3 take_bug.py
[14:28:50] ../src/storage/storage.cc:199: Using Pooled (Naive) StorageManager for CPU
||g_param||_2: 2.37E-04 | Param: convs.1.weight
||g_param||_2: 2.29E-05 | Param: convs.1.bias
||g_param||_2: 2.23E-04 | Param: convs.2.weight
||g_param||_2: 1.50E-04 | Param: convs.2.bias
||g_param||_2: 4.26E-04 | Param: convs.3.weight
||g_param||_2: 7.02E-04 | Param: convs.3.bias
||g_param||_2: 1.38E-04 | Param: dense_out.weight
||g_param||_2: 1.06E-02 | Param: dense_out.bias
-------------------------------------------
------- Grad Info
* ||g||_2: 1.06E-02
* ||g||_1: 1.75E-02
* ||g||_inf: 1.06E-02
||g_param||_2: 2.37E-04 | Param: convs.1.weight
||g_param||_2: 2.29E-05 | Param: convs.1.bias
||g_param||_2: 2.23E-04 | Param: convs.2.weight
||g_param||_2: 1.50E-04 | Param: convs.2.bias
||g_param||_2: 4.26E-04 | Param: convs.3.weight
||g_param||_2: 7.02E-04 | Param: convs.3.bias
||g_param||_2: 1.38E-04 | Param: dense_out.weight
||g_param||_2: 1.06E-02 | Param: dense_out.bias
-------------------------------------------
------- Grad Info
* ||g||_2: 1.06E-02
* ||g||_1: 1.75E-02
* ||g||_inf: 1.06E-02
==== Same outputs?
Y_hat1 - Yhat2 = 0.0000
```
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@mxnet.apache.org
For additional commands, e-mail: issues-help@mxnet.apache.org