You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by jx...@apache.org on 2017/08/24 17:29:51 UTC

[incubator-mxnet] branch master updated: Fix import error of broadcast max, min, mod in ndarray.py and add unit tests (#7572)

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
     new 9296907  Fix import error of broadcast max, min, mod in ndarray.py and add unit tests (#7572)
9296907 is described below

commit 9296907c8343ace2f7ed5cfef757849e63176382
Author: reminisce <wu...@gmail.com>
AuthorDate: Thu Aug 24 10:29:48 2017 -0700

    Fix import error of broadcast max, min, mod in ndarray.py and add unit tests (#7572)
---
 python/mxnet/ndarray/ndarray.py        |   4 +-
 tests/python/unittest/test_operator.py | 132 ++++++++++++++++++++++++++++-----
 2 files changed, 117 insertions(+), 19 deletions(-)

diff --git a/python/mxnet/ndarray/ndarray.py b/python/mxnet/ndarray/ndarray.py
index 20ca226..7322325 100644
--- a/python/mxnet/ndarray/ndarray.py
+++ b/python/mxnet/ndarray/ndarray.py
@@ -41,11 +41,11 @@ from .op import NDArrayBase, _STORAGE_TYPE_ID_TO_STR
 from . import broadcast_add, broadcast_mul, transpose, broadcast_not_equal, broadcast_power
 from . import broadcast_sub, broadcast_div, broadcast_to, broadcast_equal, cast_storage
 from . import broadcast_greater, broadcast_greater_equal, broadcast_lesser, broadcast_lesser_equal
-from . import zeros_like, slice
+from . import zeros_like, slice, broadcast_minimum, broadcast_maximum, broadcast_mod
 
 __all__ = ["NDArray", "concatenate", "_DTYPE_NP_TO_MX", "_DTYPE_MX_TO_NP", "_GRAD_REQ_MAP",
            "ones", "add", "arange", "divide", "equal", "full", "greater", "greater_equal",
-           "imdecode", "lesser", "lesser_equal", "maximum", "minimum", "moveaxis",
+           "imdecode", "lesser", "lesser_equal", "maximum", "minimum", "moveaxis", "modulo",
            "multiply", "negative", "not_equal", "onehot_encode", "power", "subtract",
            "true_divide", "waitall", "_new_empty_handle"]
 
diff --git a/tests/python/unittest/test_operator.py b/tests/python/unittest/test_operator.py
index 11d0ea2..ceb11ed 100644
--- a/tests/python/unittest/test_operator.py
+++ b/tests/python/unittest/test_operator.py
@@ -24,6 +24,7 @@ import itertools
 from numpy.testing import assert_allclose, assert_array_equal
 from mxnet.test_utils import *
 
+
 def np_softmax(x, axis=-1):
     # fix for old numpy on Travis not supporting keepdims
     # x = x - np.max(x, axis=-1, keepdims=True)
@@ -58,6 +59,7 @@ def check_elementwise_sum_with_shape(shape, n):
     for a in arr_grad:
         assert_almost_equal(a.asnumpy(), out_grad.asnumpy())
 
+
 def test_elementwise_sum():
     np.random.seed(0)
     nrepeat = 2
@@ -112,6 +114,7 @@ def check_concat_with_shape(shapes, dimension, skip_second):
             np_grad = arr_np[i]
             assert_almost_equal(grad.asnumpy(), np_grad + 1)
 
+
 def test_concat():
     for dimension in range(4):
         n = 2
@@ -158,6 +161,7 @@ def test_concat():
             check_concat_with_shape(shapes,dimension,True)
             check_concat_with_shape(shapes,dimension,False)
 
+
 def test_slice_channel():
     def check_slice_channel(data_ndim, axis, num_outputs, squeeze_axis):
         ins = []
@@ -221,6 +225,7 @@ def check_regression(symbol, forward, backward):
     npout = backward(npout,  arr_label.asnumpy().reshape(npout.shape))
     assert_almost_equal(npout, arr_grad.asnumpy())
 
+
 def test_regression():
     check_regression(mx.symbol.LogisticRegressionOutput,
                      lambda x: 1.0 / (1.0 + np.exp(-x)),
@@ -229,6 +234,7 @@ def test_regression():
                      lambda x: x,
                      lambda x, y : x - y)
 
+
 def check_softmax_with_ignore_label(xpu):
     X = mx.symbol.Variable('X')
     L = mx.symbol.Variable('L')
@@ -261,6 +267,7 @@ def check_softmax_with_ignore_label(xpu):
     assert abs(np.sum(grad1[:int(shape[0]/2)])) < 1e-5
     assert_almost_equal(grad0[int(shape[0]/2):], grad1[int(shape[0]/2):])
 
+
 def check_softmax_with_shape(shape, xpu, preserve_shape=False):
     # bind with label
     X = mx.symbol.Variable('X')
@@ -277,11 +284,13 @@ def check_softmax_with_shape(shape, xpu, preserve_shape=False):
     exec1.backward()
     assert_almost_equal(grad.asnumpy(), np_softmax(x.asnumpy()) - l.asnumpy(), rtol=1e-4)
 
+
 def test_softmax():
     check_softmax_with_shape((3, 4), default_context(), preserve_shape=False)
     check_softmax_with_shape((3, 4), default_context(), preserve_shape=True)
     check_softmax_with_shape((3, 4, 2), default_context(), preserve_shape=True)
 
+
 def test_python_op():
     X = mx.symbol.Variable('X')
     op = mx.operator.NumpyOp()
@@ -296,6 +305,7 @@ def test_python_op():
     exec1.backward(dy)
     assert_almost_equal(dy.asnumpy(), dx.asnumpy())
 
+
 def test_swapaxes():
     data = mx.symbol.Variable('data')
     shape = (2, 3, 4)
@@ -314,6 +324,7 @@ def test_swapaxes():
 
     assert_almost_equal(out, swap_)
 
+
 def test_scalarop():
     data = mx.symbol.Variable('data')
     shape = (3, 4)
@@ -344,6 +355,7 @@ def test_scalar_pow():
     check_symbolic_forward(test, [data_tmp], [data_tmp ** 2])
     check_symbolic_backward(test, [data_tmp], [np.ones(shape)], [2 * data_tmp])
 
+
 def test_symbol_pow():
     shape = (1, 1)
 
@@ -362,6 +374,7 @@ def test_symbol_pow():
     exp_dir = data_tmp**(exp_tmp) * np.log(data_tmp)
     check_symbolic_backward(test, [data_tmp, exp_tmp], [np.ones(shape)], [data_dir, exp_dir])
 
+
 def test_pow_fn():
     shape = (3, 4)
     exp = mx.symbol.Variable("exp")
@@ -371,6 +384,7 @@ def test_pow_fn():
     check_symbolic_forward(y, [x], [2**x])
     check_symbolic_backward(y, [x], [np.ones(shape)], [np.log(2) * 2**x])
 
+
 def test_relu():
     def frelu(x):
         return np.maximum(x, 0.0)
@@ -386,6 +400,7 @@ def test_relu():
     check_symbolic_forward(y, [xa], [ya])
     check_symbolic_backward(y, [xa], [np.ones(shape)], [ga])
 
+
 def test_sigmoid():
     def fsigmoid(a):
         return np.divide(1.0, (1.0 + np.exp(-a)))
@@ -398,6 +413,7 @@ def test_sigmoid():
     check_symbolic_forward(y, [xa], [ya])
     check_symbolic_backward(y, [xa], [np.ones(shape)], [ya * (1 - ya)])
 
+
 def test_binary_logic():
     def _inner_test(forward_gt, logic_sym, x_shape, y_shape, test_scalar=True):
         x = mx.symbol.Variable("x")
@@ -453,6 +469,7 @@ def test_binary_logic():
                 logic_sym=lambda x, y: mx.sym.broadcast_not_equal(x, y),
                 x_shape=(1, 10), y_shape=(10, 1), test_scalar=False)
 
+
 def test_embedding():
     in_dim = 10
     out_dim = 4
@@ -479,6 +496,7 @@ def test_embedding():
     exe_test.backward([grad])
     assert_almost_equal(grad_map["embed_weight"].asnumpy(), np.dot(np_onehot.T, np_grad))
 
+
 # check ops handle duplicate input correctly.
 def test_binary_op_duplicate_input():
     data = mx.symbol.Variable('data')
@@ -497,6 +515,7 @@ def test_binary_op_duplicate_input():
     exe_square.backward(out_grad)
     assert_almost_equal(arr_grad.asnumpy(), 2.0 * data_tmp)
 
+
 def test_sign():
     data = mx.symbol.Variable('data')
     shape = (3, 4)
@@ -520,6 +539,7 @@ def test_sign():
     exe_test.backward(out_grad)
     assert_almost_equal(arr_grad.asnumpy(), npout_grad)
 
+
 def test_round_ceil_floor():
     data = mx.symbol.Variable('data')
     shape = (3, 4)
@@ -536,6 +556,7 @@ def test_round_ceil_floor():
     npout = np.round(data_tmp) + np.ceil(data_tmp) + np.floor(data_tmp)
     assert_almost_equal(out, npout)
 
+
 def test_trunc():
     data_tmp = np.random.rand(3, 4) * 10 - 5
     arr_data = mx.nd.array(data_tmp)
@@ -549,6 +570,7 @@ def test_trunc():
 
     assert_almost_equal(out, npout)
 
+
 def test_rsqrt_cos_sin():
     data = mx.symbol.Variable('data')
     shape = (3, 4)
@@ -572,6 +594,7 @@ def test_rsqrt_cos_sin():
     exe_test.backward(out_grad)
     assert_almost_equal(arr_grad.asnumpy(), npout_grad)
 
+
 def test_maximum_minimum():
     data1 = mx.symbol.Variable('data')
     data2 = mx.symbol.Variable('data')
@@ -584,11 +607,9 @@ def test_maximum_minimum():
     arr_data1 = mx.nd.array(data_tmp1)
     arr_data2 = mx.nd.array(data_tmp2)
 
-
     arr_grad1 = mx.nd.empty(shape)
     arr_grad2 = mx.nd.empty(shape)
 
-
     test =  mx.sym.maximum(data1,data2) + mx.sym.minimum(data1,data2);
     exe_test = test.bind(default_context(), args=[arr_data1,arr_data2], args_grad=[arr_grad1,arr_grad2])
     exe_test.forward(is_train=True)
@@ -610,6 +631,7 @@ def test_maximum_minimum():
     assert_almost_equal(arr_grad1.asnumpy(), npout_grad1)
     assert_almost_equal(arr_grad2.asnumpy(), npout_grad2)
 
+
 def test_maximum_minimum_scalar():
     data1 = mx.symbol.Variable('data')
     shape = (3, 4)
@@ -640,6 +662,7 @@ def test_maximum_minimum_scalar():
 
     assert_almost_equal(arr_grad1.asnumpy(), npout_grad1)
 
+
 def test_abs():
     data = mx.symbol.Variable('data')
     shape = (3, 4)
@@ -663,6 +686,7 @@ def test_abs():
     exe_test.backward(out_grad)
     assert_almost_equal(arr_grad.asnumpy(), npout_grad)
 
+
 def check_deconvolution_forward_backward(input_shape, num_filter, kernel, stride, pad):
     """configure A: input --> conv --> deconv --> output.
        the convolution and deconvoluiton has similar parameter which ensure
@@ -761,6 +785,7 @@ def check_deconvolution_gradient(input_shape, num_filter, pad):
     assert_almost_equal(conv_args_grad[1].asnumpy() + deconv_addto_args_grad_npy[1],
                         deconv_addto_args_grad[1].asnumpy(), rtol=1e-3, atol=1e-2)
 
+
 def check_deconvolution_target_shape(input_shape, kernel, stride, pad, adj, target_shape=None):
     data = mx.sym.Variable(name="data")
     if target_shape:
@@ -774,6 +799,7 @@ def check_deconvolution_target_shape(input_shape, kernel, stride, pad, adj, targ
     arg_shapes, out_shapes, _ = deconv.infer_shape(data=input_shape)
     assert out_shapes[0] == (input_shape[0], 5, 8, 8)
 
+
 def test_deconvolution():
     check_deconvolution_target_shape(
         input_shape         = (2,3,4,4),
@@ -822,6 +848,7 @@ def test_deconvolution():
         pad = (3,3)
     )
 
+
 def check_nearest_upsampling_with_shape(shapes, scale, root_scale):
     arr = {'arg_%d'%i: mx.random.uniform(-10.0, 10.0, shape, ctx=mx.cpu()).copyto(default_context()) for i, shape in zip(range(len(shapes)), shapes)}
     arr_grad = {'arg_%d'%i: mx.nd.zeros(shape) for i, shape in zip(range(len(shapes)), shapes)}
@@ -834,6 +861,7 @@ def check_nearest_upsampling_with_shape(shapes, scale, root_scale):
         name = 'arg_%d'%k
         assert_allclose(arr[name].asnumpy()*root_scale**2*scale**(2*k), arr_grad[name].asnumpy(), rtol=1e-4)
 
+
 def check_bilinear_upsampling_with_shape(shapes, scale, root_scale):
     arr = {'arg_%d'%i: mx.random.uniform(-10.0, 10.0, shape, ctx=mx.cpu()).copyto(default_context()) for i, shape in zip(range(len(shapes)), shapes)}
     arr_grad = {'arg_%d'%i: mx.nd.zeros(shape) for i, shape in zip(range(len(shapes)), shapes)}
@@ -846,6 +874,7 @@ def check_bilinear_upsampling_with_shape(shapes, scale, root_scale):
         name = 'arg_%d'%k
         assert_allclose(arr[name].asnumpy()*root_scale**2*scale**(2*k), arr_grad[name].asnumpy(), rtol=1e-4)
 
+
 def test_nearest_upsampling():
     for root_scale in [1,2,3]:
         for scale in [1,2,3]:
@@ -854,6 +883,7 @@ def test_nearest_upsampling():
                     shapes = [(1,3,base*root_scale*scale**(num_shape-1-i),base*root_scale*scale**(num_shape-1-i)) for i in range(num_shape)]
                     check_nearest_upsampling_with_shape(shapes, scale, root_scale)
 
+
 def test_batchnorm_training():
     def check_batchnorm_training(stype):
         for shape in [(2, 3), (2, 3, 2, 2)]:
@@ -938,6 +968,7 @@ def test_batchnorm_training():
     for stype in stypes:
         check_batchnorm_training(stype)
 
+
 def test_convolution_grouping():
     num_filter = 4
     num_group = 2
@@ -1006,6 +1037,7 @@ def test_depthwise_convolution():
                     for arr1, arr2 in zip(exe1.outputs + exe1.grad_arrays, exe2.outputs + exe2.grad_arrays):
                         np.testing.assert_allclose(arr1.asnumpy(), arr2.asnumpy(), rtol=1e-3, atol=1e-4)
 
+
 def gen_broadcast_data(idx):
     # Manually set test cases
     binary_op_data_shape = np.array(
@@ -1061,27 +1093,35 @@ def gen_broadcast_data(idx):
         r_shape[np.where(r_axis_flags == 0)] = 1
     return [np.random.random(l_shape), np.random.random(r_shape)]
 
+
 def gen_broadcast_data_int(idx):
     d = gen_broadcast_data(idx);
     return [np.round(d[0]*100).astype(int), np.round(d[1]*100).astype(int)]
 
+
 def gen_binary_data(dummy):
     ndim = np.random.randint(1, 6)
     shape = np.random.randint(1, 6, size=(ndim,))
     return [np.random.random(shape), np.random.random(shape)]
 
+
 def gen_binary_data_int(dummy):
     d = gen_binary_data(dummy);
     return [np.round(d[0]*100).astype(int), np.round(d[1]*100).astype(int)]
 
-def check_binary_op_forward(symbol, baseline, gen_data, rtol=1e-3, atol=1e-5):
+
+def check_binary_op_forward(symbol, baseline, gen_data, rtol=1e-3, atol=1e-5, mx_nd_func=None):
     sample_num = 200
     for i in range(sample_num):
         d = gen_data(i)
         x = baseline(d[0], d[1])
-        y = symbol.bind(default_context(), args={'a': mx.nd.array(d[0]), 'b' : mx.nd.array(d[1])})
+        y = symbol.bind(default_context(), args={'a': mx.nd.array(d[0]), 'b': mx.nd.array(d[1])})
         y.forward(is_train=True)
         y = y.outputs[0].asnumpy()
+        if mx_nd_func is not None:
+            d0 = mx.nd.array(d[0], dtype=d[0].dtype)
+            d1 = mx.nd.array(d[1], dtype=d[1].dtype)
+            assert_almost_equal(y, mx_nd_func(d0, d1).asnumpy(), rtol=rtol, atol=atol)
         idx = np.abs(x-y) > atol+rtol*np.abs(x)
         if idx.any():
             print('found precision problem')
@@ -1097,11 +1137,13 @@ def check_binary_op_forward(symbol, baseline, gen_data, rtol=1e-3, atol=1e-5):
             print('diff: {}'.format(np.abs(x-y)[idx] - atol-rtol*np.abs(x)[idx]))
         assert_allclose(y, x, rtol=rtol, atol=atol)
 
+
 def check_binary_op_backward(symbol, baseline, gen_data, rtol=1e-3, atol=1e-5):
     sample_num = 200
     for i in range(sample_num):
         d = gen_data(i)
         out = np.random.random((d[0] + d[1]).shape)
+
         def reduce_op(shape, x):
             if shape == x.shape:
                 return x
@@ -1111,18 +1153,20 @@ def check_binary_op_backward(symbol, baseline, gen_data, rtol=1e-3, atol=1e-5):
                     keepdims_shape[i] = 1
                     x = np.sum(x, axis=i).reshape(keepdims_shape)
             return x
+
         baseline_grad1, baseline_grad2 = baseline(out, d[0], d[1])
         x_1 = reduce_op(d[0].shape, baseline_grad1)
         x_2 = reduce_op(d[1].shape, baseline_grad2)
         y_1 = mx.nd.empty(d[0].shape)
         y_2 = mx.nd.empty(d[1].shape)
-        y = symbol.bind(default_context(), args={'a': mx.nd.array(d[0]), 'b' : mx.nd.array(d[1])},
+        y = symbol.bind(default_context(), args={'a': mx.nd.array(d[0]), 'b': mx.nd.array(d[1])},
                         args_grad=[y_1, y_2])
         y.forward(is_train=True)
         y.backward([mx.nd.array(out)])
         assert_allclose(y_1.asnumpy(), x_1, rtol=rtol, atol=atol)
         assert_allclose(y_2.asnumpy(), x_2, rtol=rtol, atol=atol)
 
+
 def test_binary_op():
     a = mx.sym.Variable('a')
     b = mx.sym.Variable('b')
@@ -1177,51 +1221,65 @@ def test_binary_op():
     test_bpow(a, b)
     test_bneq(a, b)
 
+
 def test_broadcast_binary_op():
     a = mx.sym.Variable('a')
     b = mx.sym.Variable('b')
 
     def test_bplus(a, b):
         c = mx.sym.broadcast_plus(a, b)
-        check_binary_op_forward(c, lambda a, b: a + b, gen_broadcast_data)
+        check_binary_op_forward(c, lambda a, b: a + b, gen_broadcast_data, mx_nd_func=mx.nd.add)
         check_binary_op_backward(c, lambda g_out, a, b: (g_out, g_out), gen_broadcast_data)
 
     def test_bminus(a, b):
         c = mx.sym.broadcast_minus(a, b)
-        check_binary_op_forward(c, lambda a, b: a - b, gen_broadcast_data)
+        check_binary_op_forward(c, lambda a, b: a - b, gen_broadcast_data, mx_nd_func=mx.nd.subtract)
         check_binary_op_backward(c, lambda g_out, a, b: (g_out, - g_out), gen_broadcast_data)
 
     def test_bmul(a, b):
         c = mx.sym.broadcast_mul(a, b)
-        check_binary_op_forward(c, lambda a, b: a * b, gen_broadcast_data)
+        check_binary_op_forward(c, lambda a, b: a * b, gen_broadcast_data, mx_nd_func=mx.nd.multiply)
         check_binary_op_backward(c, lambda g_out, a, b: (g_out * b, g_out * a), gen_broadcast_data)
 
     def test_bdiv(a, b):
         c = mx.sym.broadcast_div(a, b)
-        check_binary_op_forward(c, lambda a, b: a / b, gen_broadcast_data)
+        check_binary_op_forward(c, lambda a, b: a / b, gen_broadcast_data, mx_nd_func=mx.nd.divide)
         check_binary_op_backward(c, lambda g_out, a, b: (g_out / b, - g_out * a / (b * b)), gen_broadcast_data)
 
     def test_bmod(a, b):
         c = mx.sym.broadcast_mod(a, b)
-        check_binary_op_forward(c, lambda a, b: a % b, gen_broadcast_data, atol=1)
+        check_binary_op_forward(c, lambda a, b: a % b, gen_broadcast_data, atol=1, mx_nd_func=mx.nd.modulo)
         check_binary_op_backward(c, lambda g_out, a, b: (g_out, - g_out * (a // b)), gen_broadcast_data, atol=1)
 
     def test_bmod_int(a, b):
         c = mx.sym.broadcast_mod(mx.sym.cast(a, dtype='int32'), mx.sym.cast(b, dtype='int32'))
-        check_binary_op_forward(c, lambda a, b: a % b, gen_broadcast_data_int)
+        check_binary_op_forward(c, lambda a, b: a % b, gen_broadcast_data_int, mx_nd_func=mx.nd.modulo)
         check_binary_op_backward(c, lambda g_out, a, b: (np.zeros_like(a), np.zeros_like(b)), gen_broadcast_data_int)
 
     def test_bpow(a, b):
         c = mx.sym.broadcast_power(a, b)
-        check_binary_op_forward(c, lambda a, b: a ** b, gen_broadcast_data)
+        check_binary_op_forward(c, lambda a, b: a ** b, gen_broadcast_data, mx_nd_func=mx.nd.power)
         check_binary_op_backward(c, lambda g_out, a, b: (g_out * a **(b - 1) * b,
                                         g_out * a ** b * np.log(a)), gen_broadcast_data)
 
     def test_bequal(a, b):
         c = mx.sym.broadcast_equal(a, b)
-        check_binary_op_forward(c, lambda a, b: (a == b).astype(a.dtype), gen_broadcast_data_int)
+        check_binary_op_forward(c, lambda a, b: (a == b).astype(a.dtype), gen_broadcast_data_int,
+                                mx_nd_func=mx.nd.equal)
         check_binary_op_backward(c, lambda g_out, a, b: (np.zeros_like(a), np.zeros_like(b)), gen_broadcast_data_int)
 
+    def test_bmax(a, b):
+        c = mx.sym.broadcast_maximum(a, b)
+        check_binary_op_forward(c, lambda x, y: np.maximum(x, y), gen_broadcast_data, mx_nd_func=mx.nd.maximum)
+        # pass idx=200 to gen_broadcast_data so that generated ndarrays' sizes are not too big
+        check_numeric_gradient(c, gen_broadcast_data(idx=200), rtol=1e-2, atol=1e-3)
+
+    def test_bmin(a, b):
+        c = mx.sym.broadcast_minimum(a, b)
+        check_binary_op_forward(c, lambda x, y: np.minimum(x, y), gen_broadcast_data, mx_nd_func=mx.nd.minimum)
+        # pass idx=200 to gen_broadcast_data so that generated ndarrays' sizes are not too big
+        check_numeric_gradient(c, gen_broadcast_data(idx=200), rtol=1e-2, atol=1e-3)
+
     test_bplus(a, b)
     test_bminus(a, b)
     test_bmul(a, b)
@@ -1230,6 +1288,9 @@ def test_broadcast_binary_op():
     test_bmod_int(a, b)
     test_bpow(a, b)
     test_bequal(a, b)
+    test_bmax(a, b)
+    test_bmin(a, b)
+
 
 def test_run_convolution_dilated_impulse_response(dil=(1,1), kernel_shape=(3,3), verbose=False):
     # Input for spike response
@@ -1238,7 +1299,6 @@ def test_run_convolution_dilated_impulse_response(dil=(1,1), kernel_shape=(3,3),
     spike_img = mx.nd.array(spike_imgs)
     spike_img2 = mx.nd.array(spike_imgs)
 
-
     kernel_weights = mx.nd.ones(shape=tuple([1,1]+list(kernel_shape)), dtype=np.float32)
     kernel_weights2 = mx.nd.ones(shape=tuple([1,1]+list(kernel_shape)), dtype=np.float32)
 
@@ -1374,6 +1434,7 @@ def test_reshape():
     exe.backward(out_grads=[mx.nd.array(out_grad_npy, ctx=default_context())])
     assert_allclose(exe.grad_arrays[0].asnumpy(), out_grad_npy.reshape((5, 4, 3, 7)))
 
+
 def test_reduce():
     sample_num = 500
     def test_reduce_inner(numpy_reduce_func, numpy_reduce_grad_func, mx_reduce_sym, nan_prob = 0):
@@ -1490,6 +1551,7 @@ def test_broadcast():
         test_broadcasting_ele(sym_bcast_axis)
         test_broadcasting_ele(sym_bcast_to)
 
+
 def test_transpose():
     for ndim in range(1, 7):
         for t in range(5):
@@ -1589,6 +1651,7 @@ def test_slice_axis():
             xx[idx] = x.asnumpy()[idx]
             assert_allclose(xx + x_grad_npy, xgrad.asnumpy(), atol=1E-5)
 
+
 def test_flip():
     for ndim in range(1, 6):
         for t in range(5):
@@ -1677,18 +1740,22 @@ def test_dot(ctx=default_context()):
         x = mx.sym.Variable('x', dtype=data_type)
         y = mx.sym.Variable('y', dtype=data_type)
         return mx.sym.dot(x, y)
+
     def dot_sym_xT(data_type):
         x = mx.sym.Variable('x', dtype=data_type)
         y = mx.sym.Variable('y', dtype=data_type)
         return mx.sym.dot(x, y, transpose_a=True)
+
     def dot_sym_yT(data_type):
         x = mx.sym.Variable('x', dtype=data_type)
         y = mx.sym.Variable('y', dtype=data_type)
         return mx.sym.dot(x, y, transpose_b=True)
+
     def dot_sym_xT_yT(data_type):
         x = mx.sym.Variable('x', dtype=data_type)
         y = mx.sym.Variable('y', dtype=data_type)
         return mx.sym.dot(x, y, transpose_a=True, transpose_b=True)
+
     for data_type in dtypes:
         for ashape, bshape in [((3, 4), (4, 5)), ((2, 3, 4), (4, 5, 6))]:
             m1_npy = np.random.uniform(-1, 1, ashape)
@@ -1700,6 +1767,7 @@ def test_dot(ctx=default_context()):
             check_numeric_gradient(dot_sym_yT(data_type), [m1_npy, m2_npy.T], numeric_eps=1e-1, rtol=2e-2, atol=1e-3)
             check_numeric_gradient(dot_sym_xT_yT(data_type), [m1_npy.T, m2_npy.T], numeric_eps=1e-1, rtol=2e-2, atol=1e-3)
 
+
 def test_batch_dot():
     dtypes = ['float32', 'float64']
 
@@ -1756,6 +1824,7 @@ def test_batch_dot():
                             assert_almost_equal(exe_add.grad_dict['b'].asnumpy(),
                                 bgrad_npy + b_init_grad_npy, rtol=1e-3, atol=1e-4)
 
+
 def get_correlation(data1,data2,kernel_size,max_displacement,stride1,stride2,pad_size,is_multiply):
 
     img1 = mx.sym.Variable('img1')
@@ -1763,6 +1832,7 @@ def get_correlation(data1,data2,kernel_size,max_displacement,stride1,stride2,pad
     return mx.sym.Correlation(data1=img1,data2=img2,kernel_size =kernel_size,max_displacement = max_displacement,
                               stride1 = stride1,stride2 = stride2,pad_size= pad_size,is_multiply = is_multiply)
 
+
 def correlation_forward(data1,data2,pad_size,kernel_size,stride1,stride2,max_displacement,is_multiply):
 
     # compute output's dimension
@@ -1810,6 +1880,7 @@ def correlation_forward(data1,data2,pad_size,kernel_size,stride1,stride2,max_dis
     out /= float(kernel_size**2*data1.shape[1])
     return out,tmp1,tmp2
 
+
 def correlation_backward(out_grad,tmp1,tmp2,data1,data2,pad_size,kernel_size,stride1,stride2,max_displacement,is_multiply):
 
     # compute output's dimension
@@ -1859,6 +1930,7 @@ def correlation_backward(out_grad,tmp1,tmp2,data1,data2,pad_size,kernel_size,str
     tmp2_grad = tmp2_grad / float(kernel_size**2*data1.shape[1])
     return tmp1_grad[:,:,pad_size:pad_size+data1.shape[2],pad_size:pad_size+data1.shape[3]],tmp2_grad[:,:,pad_size:pad_size+data1.shape[2],pad_size:pad_size+data1.shape[3]],
 
+
 def unittest_correlation(data_shape,kernel_size,max_displacement,stride1,stride2,pad_size,is_multiply):
 
     img1 = np.random.random(data_shape)
@@ -1891,8 +1963,8 @@ def unittest_correlation(data_shape,kernel_size,max_displacement,stride1,stride2
     assert_almost_equal(exe1.grad_dict['img1'].asnumpy(), grad1, rtol=1e-3, atol=1e-4)
     assert_almost_equal(exe1.grad_dict['img2'].asnumpy(), grad2, rtol=1e-3, atol=1e-4)
 
-def test_correlation():
 
+def test_correlation():
     unittest_correlation((1,3,10,10), kernel_size = 1,max_displacement = 4,stride1 = 1,stride2 = 1,pad_size = 4,is_multiply = False)
     unittest_correlation((5,1,15,15), kernel_size = 1,max_displacement = 5,stride1 = 1,stride2 = 1,pad_size = 5,is_multiply = False)
     unittest_correlation((5,1,15,15), kernel_size = 1,max_displacement = 5,stride1 = 1,stride2 = 1,pad_size = 5,is_multiply = True)
@@ -1932,6 +2004,7 @@ def test_support_vector_machine_l1_svm():
 
     assert_almost_equal(grad_np, grad.asnumpy())
 
+
 def test_support_vector_machine_l2_svm():
     xpu = default_context()
     shape = (20, 10)
@@ -1979,6 +2052,7 @@ def test_roipooling():
                            grad_nodes={'data':'add', 'rois':'null'},
                            numeric_eps=1e-4, rtol=1e-1, atol=1E-4)
 
+
 def check_pad_with_shape(shape, xpu, pad_width, mode):
     # bind with label
     X = mx.symbol.Variable('X')
@@ -1997,6 +2071,7 @@ def check_pad_with_shape(shape, xpu, pad_width, mode):
     # grad check
     check_numeric_gradient(Y, [x.asnumpy()], numeric_eps=1e-2, rtol=1e-2)
 
+
 def test_pad():
     shape1 = (2, 3, 3, 5)
     pad1 = (0, 0, 0, 0, 1, 2, 3, 4)
@@ -2009,6 +2084,7 @@ def test_pad():
     check_pad_with_shape(shape1, default_context(), pad1, 'reflect')
     check_pad_with_shape(shape2, default_context(), pad2, 'reflect')
 
+
 def np_instance_norm(data, weight, bias, eps):
     spatial_dims = data.shape[2::]
     num_spatial_vals = np.prod(np.array(spatial_dims))
@@ -2025,6 +2101,7 @@ def np_instance_norm(data, weight, bias, eps):
     biasBatch = np.reshape(np.repeat(biasBatch, num_spatial_vals), data.shape)
     return weightBatch * (data - mean)/np.sqrt(var + eps) + biasBatch
 
+
 def check_instance_norm_with_shape(shape, xpu):
     # bind with label
     eps = 0.001
@@ -2045,12 +2122,14 @@ def check_instance_norm_with_shape(shape, xpu):
     check_numeric_gradient(Y, {'X':x.asnumpy(), 'G':gamma.asnumpy(), 'B':beta.asnumpy()},
                            numeric_eps=1e-2, rtol=1e-2, atol=1e-2)
 
+
 def test_instance_normalization():
     check_instance_norm_with_shape((1, 1, 1), default_context())
     check_instance_norm_with_shape((2, 1, 2), default_context())
     check_instance_norm_with_shape((2,4,5,6), default_context())
     check_instance_norm_with_shape((3,3,2,3,2,1,1), default_context())
 
+
 def check_l2_normalization(in_shape, mode, ctx=default_context(), norm_eps=1e-10):
     data = mx.symbol.Variable('data')
     out = mx.symbol.L2Normalization(data=data, mode=mode, eps=norm_eps)
@@ -2084,6 +2163,7 @@ def check_l2_normalization(in_shape, mode, ctx=default_context(), norm_eps=1e-10
     # check gradient
     check_numeric_gradient(out, [in_data], numeric_eps=1e-3, rtol=1e-2, atol=1e-3)
 
+
 def test_l2_normalization():
     for mode in ['channel', 'spatial', 'instance']:
         for nbatch in [1, 4]:
@@ -2093,6 +2173,7 @@ def test_l2_normalization():
                     for width in [5, 7]:
                         check_l2_normalization((nbatch, nchannel, height, width), mode)
 
+
 def sequence_mask_numpy(array, lengths, value):
     arrayMask = array.copy()
     shape = array.shape
@@ -2101,6 +2182,7 @@ def sequence_mask_numpy(array, lengths, value):
         arrayMask[int(lengths[i]):, i] = value
     return arrayMask
 
+
 def check_sequence_mask(shape, xpu, mask_value):
     # bind with label
     X = mx.symbol.Variable('X')
@@ -2123,12 +2205,14 @@ def check_sequence_mask(shape, xpu, mask_value):
     check_numeric_gradient(Y, [x.asnumpy(), l.asnumpy()], grad_nodes={'X':'write'},
         numeric_eps=1e-3, rtol=1e-2)
 
+
 def test_sequence_mask():
     shape1 = (4, 2, 2, 3)
     shape2 = (1, 2, 2, 3, 1, 1)
     check_sequence_mask(shape1, default_context(), 2.1)
     check_sequence_mask(shape2, default_context(), 0.1)
 
+
 def check_sequence_reverse(xpu):
 
     # sample data
@@ -2192,6 +2276,7 @@ def check_sequence_reverse(xpu):
 def test_sequence_reverse():
     check_sequence_reverse(mx.cpu())
 
+
 def mathematical_core_binary(name,
                              forward_mxnet_call,
                              forward_numpy_call,
@@ -2236,6 +2321,7 @@ def mathematical_core_binary(name,
     assert_almost_equal(arr_grad1, npout_grad1)
     assert_almost_equal(arr_grad2, npout_grad2)
 
+
 def mathematical_core(name, forward_mxnet_call, forward_numpy_call, backward_numpy_call, data_init=5., grad_init=2.):
     data = mx.symbol.Variable('data')
     shape = (3, 4)
@@ -2264,6 +2350,7 @@ def mathematical_core(name, forward_mxnet_call, forward_numpy_call, backward_num
     # print(npout_grad)
     assert_almost_equal(arr_grad, npout_grad)
 
+
 def test_special_functions_using_scipy():
     try:
         from scipy import special as scipy_special
@@ -2294,6 +2381,7 @@ def rounding(name, forward_mxnet_call, forward_numpy_call, data_init=5., grad_in
     npout = forward_numpy_call(data_tmp)
     assert_almost_equal(out, npout)
 
+
 def test_mathematical():
     # rsqrt
     mathematical_core("rsqrt",
@@ -2380,6 +2468,7 @@ def test_mathematical():
     # fix
     rounding("fix", lambda x: mx.sym.fix(x), lambda x: np.fix(x))
 
+
 def test_special_functions_using_scipy():
     try:
         from scipy import special as scipy_special
@@ -2395,6 +2484,7 @@ def test_special_functions_using_scipy():
     mathematical_core("gammaln", lambda x: mx.sym.gammaln(x), lambda x: scipy_special.gammaln(x),
                      lambda x: scipy_special.psi(x), 0.5, 0.5)
 
+
 def test_clip():
     data = mx.symbol.Variable('data')
     shape = (30, 30)
@@ -2404,6 +2494,7 @@ def test_clip():
     check_symbolic_backward(test, [data_tmp], [np.ones(shape)],
                             [np.where(data_tmp < 0.6, [1], [0]) * np.where(data_tmp > -0.6, [1], [0])])
 
+
 def test_init():
     def test_basic_val_init(sym_func, np_func, shape, dtype):
         x = sym_func(shape=shape, dtype=dtype)
@@ -2411,6 +2502,7 @@ def test_init():
         exe.forward(is_train=True)
         assert_almost_equal(exe.outputs[0].asnumpy(), np_func(shape=shape, dtype=dtype))
         assert exe.outputs[0].asnumpy().dtype == dtype
+
     def test_arange():
         for i in range(5):
             start = np.random.rand() * 10
@@ -2432,6 +2524,7 @@ def test_init():
 
 def test_order():
     ctx = default_context()
+
     def gt_topk(dat, axis, ret_typ, k, is_ascend):
         if ret_typ == "indices":
             if is_ascend:
@@ -2538,6 +2631,7 @@ def test_blockgrad():
     assert_almost_equal(exe.outputs[0].asnumpy(), a_npy)
     exe.backward()  # No error if BlockGrad works
 
+
 def test_take():
     def check_output_n_grad(data_shape, idx_shape):
         exe = result.simple_bind(default_context(), a=data_shape,
@@ -2691,7 +2785,6 @@ def test_bilinear_sampler():
                             +(1-xWeightTopLeft) * (1-yWeightTopLeft) * inBottomRight
         return out
 
-
     def bilinear_backward_numpy(out_grad, data, grid):
 
         data_grad = np.zeros(data.shape, dtype=np.float32)
@@ -2802,6 +2895,7 @@ def test_bilinear_sampler():
             assert_almost_equal(exe_addto.grad_dict['data'].asnumpy(), data_grad + data_initial_grid, rtol=1e-3,atol=1e-5)
             assert_almost_equal(exe_addto.grad_dict['grid'].asnumpy(), grid_grad + grid_initial_grid, rtol=1e-3,atol=1e-5)
 
+
 def test_index2d():
     for _ in range(30):
         n = np.random.randint(1, 100)
@@ -2811,6 +2905,7 @@ def test_index2d():
         r = mx.nd.batch_take(data, x)
         assert_almost_equal(r.asnumpy(), data.asnumpy()[np.arange(n), x.asnumpy()])
 
+
 def test_cast():
     for srctype in [np.int32, np.float32, np.float16]:
         for dsttype in [np.float32, np.int32, np.float16]:
@@ -3277,6 +3372,7 @@ def check_ctc_loss(acts, labels, loss_truth):
     # test grad
     check_numeric_gradient(ctc, [acts, labels], grad_nodes=['input'], rtol=0.05, atol=1e-3)
 
+
 def test_ctc_loss():
     # Test 1: check that batches are same + check against Torch WarpCTC
     acts = np.array([
@@ -3310,6 +3406,7 @@ def test_quantization_op():
     assert same(qa.asnumpy(), qa_real.asnumpy())
     assert same(a_.asnumpy(),  a_real.asnumpy())
 
+
 def test_reciprocal_op():
     data_tmp = np.random.rand(3, 4) * 10 - 5
     # Avoid possible division by 0 errors
@@ -3320,6 +3417,7 @@ def test_reciprocal_op():
     check_numeric_gradient(test, [data_tmp])
     check_symbolic_forward(test, [data_tmp], [np.reciprocal(data_tmp)])
 
+
 def test_custom_op():
     class Sqr(mx.operator.CustomOp):
         def forward(self, is_train, req, in_data, out_data, aux):
@@ -3391,6 +3489,7 @@ def test_psroipooling():
                         check_numeric_gradient(op, [im_data, rois_data], rtol=rtol, atol=atol,
                                                grad_nodes=grad_nodes, ctx=mx.gpu(0))
 
+
 def test_deformable_convolution():
     for num_batch in [1, 2]:
         for num_channel_data, num_deformable_group in itertools.product([4, 8], [1, 2]):
@@ -3461,7 +3560,6 @@ def test_deformable_psroipooling():
                                                grad_nodes=grad_nodes, ctx=mx.gpu(0))
 
 
-
 def test_laop():
 
     # enable numerical checking of gradients

-- 
To stop receiving notification emails like this one, please contact
['"commits@mxnet.apache.org" <co...@mxnet.apache.org>'].