You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by an...@apache.org on 2018/09/27 07:16:29 UTC
[incubator-mxnet] branch master updated: fix bug in prelu ,
issue 12061 (#12660)
This is an automated email from the ASF dual-hosted git repository.
anirudh2290 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git
The following commit(s) were added to refs/heads/master by this push:
new 40e3d9c fix bug in prelu , issue 12061 (#12660)
40e3d9c is described below
commit 40e3d9c1dde2a803ccd0fa165a09f9c3ded6a121
Author: TianqiTang <22...@qq.com>
AuthorDate: Thu Sep 27 15:15:59 2018 +0800
fix bug in prelu , issue 12061 (#12660)
* fix bug in prelu
* add unit test
---
src/operator/leaky_relu-inl.h | 2 +-
tests/python/unittest/test_operator.py | 67 ++++++++++++++++++++--------------
2 files changed, 41 insertions(+), 28 deletions(-)
diff --git a/src/operator/leaky_relu-inl.h b/src/operator/leaky_relu-inl.h
index 1d2baa4..fe26689 100644
--- a/src/operator/leaky_relu-inl.h
+++ b/src/operator/leaky_relu-inl.h
@@ -301,7 +301,7 @@ class LeakyReLUOp : public Operator {
TShape result(dst.ndim());
int s = src.ndim() - 1;
for (int i = dst.ndim() - 1; i >= 0; i--) {
- if (s >= 0 && (dst[i] == src[s] || src[s] == 1)) {
+ if (s >= 0 && i <= 1 && (dst[i] == src[s] || src[s] == 1)) {
result[i] = src[s];
s--;
} else {
diff --git a/tests/python/unittest/test_operator.py b/tests/python/unittest/test_operator.py
index a7f484e..b5a7303 100644
--- a/tests/python/unittest/test_operator.py
+++ b/tests/python/unittest/test_operator.py
@@ -772,48 +772,61 @@ def test_prelu():
def fprelu(x, gamma):
pos_indices = x > 0
out = x.copy()
- out = np.multiply(out, gamma)
+ if len(x.shape) == 4:
+ out = out.transpose(2,3,0,1)
+ out = np.multiply(out, gamma)
+ out = out.transpose(2,3,0,1)
+ else:
+ out = np.multiply(out, gamma)
out[pos_indices] = x[pos_indices]
return out
def fprelu_grad(x, y, gamma):
pos_indices = x > 0
- grad_x = np.multiply(np.ones(x.shape), gamma)
+ if len(x.shape) == 4:
+ grad_x = np.multiply(np.ones(x.shape).transpose(2,3,0,1), gamma)
+ grad_x = grad_x.transpose(2,3,0,1)
+ else:
+ grad_x = np.multiply(np.ones(x.shape), gamma)
grad_gam = np.zeros(gamma.shape)
copy_x = x.copy()
copy_x[pos_indices] = 0.0
grad_x[pos_indices] = 1.0
- if len(gamma.shape) > 1:
+ if len(gamma.shape) > 1 and len(x.shape) != 4:
grad_gam = copy_x
+ elif len(gamma.shape) > 1 and len(x.shape) == 4:
+ grad_gam = np.sum(copy_x, axis=(2,3))
elif gamma.shape[0] == 1:
grad_gam = np.sum(np.sum(copy_x))
- elif gamma.shape[0] > 1:
+ elif gamma.shape[0] > 1 and len(x.shape) != 4:
grad_gam = np.sum(copy_x, axis=0)
+ elif gamma.shape[0] > 1 and len(x.shape) == 4:
+ grad_gam = np.sum(copy_x, axis=(0,2,3))
return (grad_x, grad_gam)
- shape = (3,4)
x = mx.symbol.Variable("x")
gamma = mx.symbol.Variable("gamma")
- for dtype in [np.float16, np.float32, np.float64]:
- for gam in [np.array([0.1, 0.2, 0.3, 0.4], dtype=dtype)]:
- gam_full = np.array([gam, gam, gam])
- xa = np.random.uniform(low=-1.0,high=1.0,size=shape).astype(dtype)
- rtol = 1e-2
- atol = 1e-3
- eps = 1e-4
- xa[abs(xa) < eps] = 1.0
- y = mx.symbol.LeakyReLU(data=x, gamma=gamma, act_type='prelu')
- ya = fprelu(xa, gam)
- ya_full = fprelu(xa, gam_full)
- g_xa, g_gam = fprelu_grad(xa, ya, gamma=gam)
- g_xa_full, g_gam_full = fprelu_grad(xa, ya_full, gamma=gam_full)
- # Skip numeric check for float16 type to get rid of flaky behavior
- if dtype is not np.float16:
- check_numeric_gradient(y, [xa, gam], numeric_eps=eps, rtol=rtol, atol=atol, dtype=dtype)
- check_numeric_gradient(y, [xa, gam_full], numeric_eps=eps, rtol=rtol, atol=atol, dtype=dtype)
- check_symbolic_forward(y, [xa, gam], [ya], rtol=rtol, atol=atol, dtype=dtype)
- check_symbolic_backward(y, [xa, gam], [np.ones(shape), np.ones(gam.shape)], [g_xa, g_gam], rtol=rtol, atol=atol, dtype=dtype)
- check_symbolic_forward(y, [xa, gam_full], [ya_full], rtol=rtol, atol=atol, dtype=dtype)
- check_symbolic_backward(y, [xa, gam_full], [np.ones(shape), np.ones(gam_full.shape)],
- [g_xa_full, g_gam_full], rtol=rtol, atol=atol, dtype=dtype)
+ for shape in [(3,4), (3,4,4,5)]:
+ for dtype in [np.float16, np.float32, np.float64]:
+ for gam in [np.array([0.1, 0.2, 0.3, 0.4], dtype=dtype)]:
+ gam_full = np.array([gam, gam, gam])
+ xa = np.random.uniform(low=-1.0,high=1.0,size=shape).astype(dtype)
+ rtol = 1e-2
+ atol = 1e-3
+ eps = 1e-4
+ xa[abs(xa) < eps] = 1.0
+ y = mx.symbol.LeakyReLU(data=x, gamma=gamma, act_type='prelu')
+ ya = fprelu(xa, gam)
+ ya_full = fprelu(xa, gam_full)
+ g_xa, g_gam = fprelu_grad(xa, ya, gamma=gam)
+ g_xa_full, g_gam_full = fprelu_grad(xa, ya_full, gamma=gam_full)
+ # Skip numeric check for float16 type to get rid of flaky behavior
+ if dtype is not np.float16:
+ check_numeric_gradient(y, [xa, gam], numeric_eps=eps, rtol=rtol, atol=atol, dtype=dtype)
+ check_numeric_gradient(y, [xa, gam_full], numeric_eps=eps, rtol=rtol, atol=atol, dtype=dtype)
+ check_symbolic_forward(y, [xa, gam], [ya], rtol=rtol, atol=atol, dtype=dtype)
+ check_symbolic_backward(y, [xa, gam], [np.ones(shape), np.ones(gam.shape)], [g_xa, g_gam], rtol=rtol, atol=atol, dtype=dtype)
+ check_symbolic_forward(y, [xa, gam_full], [ya_full], rtol=rtol, atol=atol, dtype=dtype)
+ check_symbolic_backward(y, [xa, gam_full], [np.ones(shape), np.ones(gam_full.shape)],
+ [g_xa_full, g_gam_full], rtol=rtol, atol=atol, dtype=dtype)
@with_seed()
def test_selu():