You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by zh...@apache.org on 2019/01/16 20:16:51 UTC
[incubator-mxnet] branch master updated: fix bug in nag optimizer
(#13683)
This is an automated email from the ASF dual-hosted git repository.
zhasheng pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git
The following commit(s) were added to refs/heads/master by this push:
new 9314689 fix bug in nag optimizer (#13683)
9314689 is described below
commit 93146890e7984f0375d87ef417f7cad751488cca
Author: solin319 <li...@126.com>
AuthorDate: Thu Jan 17 04:16:21 2019 +0800
fix bug in nag optimizer (#13683)
* fix bug in nag optimizer
```
grad += wd * weight
mom[:] += grad
grad[:] += self.momentum * mom
weight[:] += -lr * grad
```
This will minus wd*weight twice, but in`state = momentum * state + grad + wd * weight weight = weight - (lr * (grad + momentum * state)) ` only minus once.
* fix bug in nag test
fix bug in nag test
* rewrite nag test
* rewrite nag
* fix nag with in-place operations
* fix nag with in-place operations
---
python/mxnet/optimizer/optimizer.py | 4 ++--
tests/python/unittest/test_optimizer.py | 8 ++++----
2 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/python/mxnet/optimizer/optimizer.py b/python/mxnet/optimizer/optimizer.py
index d290a3f..6ffbbcf 100644
--- a/python/mxnet/optimizer/optimizer.py
+++ b/python/mxnet/optimizer/optimizer.py
@@ -978,10 +978,10 @@ class NAG(Optimizer):
if state is not None:
mom = state
mom[:] *= self.momentum
- grad += wd * weight
mom[:] += grad
+ mom[:] += wd * weight
grad[:] += self.momentum * mom
- weight[:] += -lr * grad
+ weight[:] -= lr * grad
else:
assert self.momentum == 0.0
weight[:] += -lr * (grad + wd * weight)
diff --git a/tests/python/unittest/test_optimizer.py b/tests/python/unittest/test_optimizer.py
index 935bd9a..3fdd1cd 100644
--- a/tests/python/unittest/test_optimizer.py
+++ b/tests/python/unittest/test_optimizer.py
@@ -385,10 +385,10 @@ class PyNAG(PySGD):
else:
mom = state
mom[:] *= self.momentum
- grad += wd * weight
mom[:] += grad
+ mom[:] += wd * weight
grad[:] += self.momentum * mom
- weight[:] += -lr * grad
+ weight[:] -= lr * grad
else:
grad32 = array(grad, ctx=grad.context, dtype=np.float32)
grad32 = grad32 * self.rescale_grad
@@ -400,10 +400,10 @@ class PyNAG(PySGD):
weight32[:] += -lr * (grad32 + wd * weight32)
else:
mom[:] *= self.momentum
- grad32 += wd * weight32
mom[:] += grad32
+ mom[:] += wd * weight32
grad32[:] += self.momentum * mom
- weight32[:] += -lr * grad32
+ weight32[:] -= lr * grad32
tmp = weight32.astype(weight.dtype)
tmp.copyto(weight)