You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by GitBox <gi...@apache.org> on 2020/01/15 18:55:59 UTC
[GitHub] [incubator-mxnet] access2rohit commented on a change in pull request #17305: grouping large array tests based on type and updating nightly CI funtion

access2rohit commented on a change in pull request #17305: grouping large array tests based on type and updating nightly CI funtion
URL: https://github.com/apache/incubator-mxnet/pull/17305#discussion_r367048895
 
 

 ##########
 File path: tests/nightly/test_large_array.py
 ##########
 @@ -35,1658 +35,1627 @@
 LARGE_SIZE = LARGE_X * SMALL_Y
 
 
-def test_gluon_embedding():
-    m = gluon.nn.Embedding(SMALL_Y, MEDIUM_X)
-    m.initialize()
-    a = nd.zeros((MEDIUM_X, SMALL_Y))
-    b = m(a)
-    assert b.shape == (MEDIUM_X, SMALL_Y, MEDIUM_X)
-    assert b.asnumpy().size == LARGE_SIZE
-
-
-def test_ndarray_zeros():
-    a = nd.zeros(shape=(LARGE_X, SMALL_Y))
-    assert a[-1][0] == 0
-    assert a.shape == (LARGE_X, SMALL_Y)
-    assert a.size == LARGE_SIZE
-
-
-def test_ndarray_ones():
-    a = nd.ones(shape=(LARGE_X, SMALL_Y))
-    assert a[-1][0] == 1
-    assert nd.sum(a).asnumpy() == LARGE_SIZE
-
-
-def test_ndarray_convert():
-    a = nd.zeros(shape=(LARGE_X, SMALL_Y))
-    b = a.astype(np.int32)
-    assert b.dtype == np.int32
-    b = a.tostype('row_sparse')
-    assert isinstance(b, mx.nd.sparse.RowSparseNDArray)
-
-
-@with_seed()
-def test_ndarray_random_uniform():
-    a = nd.random.uniform(shape=(LARGE_X, SMALL_Y))
-    assert a[-1][0] != 0
-
-
-@with_seed()
-def test_ndarray_random_randint():
-    a = nd.random.randint(100, 10000, shape=(LARGE_X, SMALL_Y))
-    assert a.shape == (LARGE_X, SMALL_Y)
-    # check if randint can generate value greater than 2**32 (large)
-    low_large_value = 2**32
-    high_large_value = 2**34
-    a = nd.random.randint(low_large_value, high_large_value, dtype=np.int64)
-    low = mx.nd.array([low_large_value], dtype='int64')
-    high = mx.nd.array([high_large_value], dtype='int64')
-    assert a >= low and a < high
-    assert a[-1][0].dtype == np.int64
-
-
-@with_seed()
-def test_ndarray_random_exponential():
-    scale_array = nd.random.uniform(shape=(MEDIUM_X, SMALL_X))
-    a = nd.random.exponential(scale=scale_array, shape=(SMALL_X, SMALL_Y))
-    assert a[-1][0][0][0] >= 0
-    assert a.shape == (MEDIUM_X, SMALL_X, SMALL_X, SMALL_Y)
-
-
-@with_seed()
-def test_ndarray_random_gamma():
-    alpha_array = nd.random.uniform(shape=(MEDIUM_X, SMALL_X))
-    beta_array = nd.random.uniform(shape=(MEDIUM_X, SMALL_X))
-    a = nd.random.gamma(alpha=alpha_array, beta=beta_array,
-                        shape=(SMALL_X, SMALL_Y))
-    assert a[-1][0][0][0] >= 0
-    assert a.shape == (MEDIUM_X, SMALL_X, SMALL_X, SMALL_Y)
-
-
-@with_seed()
-def test_ndarray_random_multinomial():
-    # test 1 shape dimension
-    probs = nd.random.uniform(shape=(LARGE_X, SMALL_Y))
-    a = nd.random.multinomial(probs)
-    assert a[-1] >= 0
-    assert a.shape == (LARGE_X,)
-    # test for NDArray multi-dimension shape
-    a = nd.random.multinomial(probs, shape=(2, SMALL_Y))
-    assert a[-1][0][0] >= 0
-    assert a.shape == (LARGE_X, 2, SMALL_Y)
-    # test log_likelihood output shape
-    a = nd.random.multinomial(probs, shape=(2, SMALL_Y), get_prob=True)
-    assert a[0][0][0][0] >= 0
-    assert a[0].shape == (LARGE_X, 2, SMALL_Y) and a[0].shape == a[1].shape
-
-
-@with_seed()
-def test_ndarray_random_generalized_negative_binomial():
-    alpha_array = nd.random.uniform(shape=(MEDIUM_X, SMALL_X))
-    mu_array = nd.random.uniform(shape=(MEDIUM_X, SMALL_X))
-    a = nd.random.generalized_negative_binomial(mu=mu_array, alpha=alpha_array,
-                                                shape=(SMALL_X, SMALL_Y))
-    assert a[-1][0][0][0] >= 0
-    assert a.shape == (MEDIUM_X, SMALL_X, SMALL_X, SMALL_Y)
-
-
-@with_seed()
-def test_ndarray_random_negative_binomial():
-    k_array = nd.random.uniform(shape=(MEDIUM_X, SMALL_X))
-    p_array = nd.random.uniform(shape=(MEDIUM_X, SMALL_X))
-    a = nd.random.negative_binomial(k=k_array, p=p_array,
-                                    shape=(SMALL_X, SMALL_Y))
-    assert a[-1][0][0][0] >= 0
-    assert a.shape == (MEDIUM_X, SMALL_X, SMALL_X, SMALL_Y)
-
-
-@with_seed()
-def test_ndarray_random_normal():
-    scale_array = nd.random.uniform(shape=(MEDIUM_X, SMALL_X))
-    loc_array = nd.random.uniform(shape=(MEDIUM_X, SMALL_X))
-    a = nd.random.normal(loc=loc_array, scale=scale_array,
-                         shape=(SMALL_X, SMALL_Y))
-    assert a.shape == (MEDIUM_X, SMALL_X, SMALL_X, SMALL_Y)
-
-
-@with_seed()
-def test_ndarray_random_poisson():
-    lambda_array = nd.random.uniform(shape=(MEDIUM_X, SMALL_X))
-    a = nd.random.poisson(lam=lambda_array, shape=(SMALL_X, SMALL_Y))
-    assert a[-1][0][0][0] >= 0
-    assert a.shape == (MEDIUM_X, SMALL_X, SMALL_X, SMALL_Y)
-
-
-@with_seed()
-def test_ndarray_random_randn():
-    a = nd.random.randn(LARGE_X, SMALL_Y)
-    assert a.shape == (LARGE_X, SMALL_Y)
-    # TODO: Once PR #15772 for randn ndarray dtype for loc,scale param merged
-    # Add check for (x,y,m,n) where x,y shape of loc,scale and m,n input shape
-
-
-@with_seed()
-def test_ndarray_random_shuffle():
-    a = nd.ones(shape=(LARGE_X, SMALL_Y))
-    a[-1] = 3  # assign 3 to entire last row
-    a = nd.random.shuffle(a)
-    # slice first column from shuffled array
-    # pass LARGE_X values to numpy instead of LARGE_X*SMALL_Y
-    # could have assigned to last column (so as to pass SMALL_Y)
-    # but shuffle operation is performed along first axis
-    unique_a = np.unique(a[:, 0].asnumpy())
-    assert len(unique_a) == 2  # only 2 unique values
-    assert unique_a[0] == 1  # first unique value is 1
-    assert unique_a[1] == 3  # second unique value is 3
-    assert a.shape == (LARGE_X, SMALL_Y)
-
-
-def test_ndarray_empty():
-    a = nd.empty((LARGE_X, SMALL_Y))
-    assert a.shape == (LARGE_X, SMALL_Y)
-
-
-def test_elementwise():
-    a = nd.ones(shape=(LARGE_X, SMALL_Y))
-    b = nd.ones(shape=(LARGE_X, SMALL_Y))
-    res = a + b
-    assert np.sum(res[-1].asnumpy() == 2) == a.shape[1]
-    res = a + 1
-    assert np.sum(res[-1].asnumpy() == 2) == a.shape[1]
-    res = nd.sqrt(a + 3)
-    assert np.sum(res[-1].asnumpy() == 2) == a.shape[1]
-
-
-def test_reduce():
-    a = nd.ones(shape=(LARGE_X, SMALL_Y))
-    assert nd.sum(a).asnumpy() == a.shape[0] * a.shape[1]
-
-
-def test_dot():
-    a = nd.ones(shape=(LARGE_X, SMALL_Y))
-    b = nd.ones(shape=(SMALL_Y, SMALL_Y))
-    res = nd.dot(a, b)
-    assert np.sum(res[-1].asnumpy() == SMALL_Y) == b.shape[1]
-
-
-def test_FullyConnected():
-    a = nd.ones(shape=(LARGE_X, SMALL_Y))
-    b = nd.ones(shape=(SMALL_Y, SMALL_Y))
-    c = nd.ones(shape=(b.shape[0],))
-
-    # w/o bias
-    res = nd.FullyConnected(a, b, num_hidden=b.shape[0], no_bias=True)
-    assert np.sum(res[-1].asnumpy() == a.shape[1]) == b.shape[0]
-
-    # w/ bias
-    res = nd.FullyConnected(a, b, c, num_hidden=b.shape[0], no_bias=False)
-    assert np.sum(res[-1].asnumpy() == a.shape[1] + 1) == b.shape[0]
-
-
-def test_broadcast():
-    a = nd.ones(shape=(LARGE_X, SMALL_Y))
-    b = nd.arange(0, LARGE_X).reshape(LARGE_X, 1)
-    res = nd.broadcast_to(b, shape=(b.shape[0], SMALL_Y))
-    assert np.sum(res[-1].asnumpy() == LARGE_X) == res.shape[1]
-    res = mx.nd.broadcast_like(b, a)
-    assert np.sum(res[-1].asnumpy() == LARGE_X) == a.shape[1]
-
-
-def test_clip():
-    a = nd.arange(0, LARGE_X * SMALL_Y).reshape(LARGE_X, SMALL_Y)
-    res = nd.clip(a, a_min=100, a_max=1000)
-    assert np.sum(res[-1].asnumpy() == 1000) == a.shape[1]
-
-
-def test_split():
-    a = nd.arange(0, LARGE_X * SMALL_Y).reshape(LARGE_X, SMALL_Y)
-    outs = nd.split(a, num_outputs=SMALL_Y, axis=1)
-    result = sum(1 for i, v in enumerate(outs) if i == v[0].asnumpy())
-    assert result == a.shape[1]
-
-
-def test_argmin():
-    a = nd.arange(0, LARGE_X * SMALL_Y).reshape(LARGE_X, SMALL_Y)
-    idx = mx.nd.argmin(a, axis=0)
-    assert idx.shape[0] == SMALL_Y
-
-
-def test_tile():
-    a = nd.arange(0, LARGE_X).reshape(LARGE_X, 1)
-    b = nd.tile(a, reps=(1, SMALL_Y))
-    assert np.sum(b[-1].asnumpy() == LARGE_X) == b.shape[1]
-
-
-def test_take():
-    a = nd.ones(shape=(LARGE_X, SMALL_Y))
-    idx = nd.arange(LARGE_X - 1000, LARGE_X)
-    res = nd.take(a, idx)
-    assert np.sum(res[-1].asnumpy() == 1) == res.shape[1]
-
-
-def test_slice():
-    a = nd.ones(shape=(LARGE_X, SMALL_Y))
-    res = nd.slice(a, begin=(LARGE_X-1000, 1), end=(LARGE_X, SMALL_Y))
-    assert np.sum(res[-1].asnumpy() == 1) == res.shape[1]
-
-
-def test_slice_assign():
-    a = nd.ones(shape=(LARGE_X, SMALL_Y))
-    a[LARGE_X-1:LARGE_X] = 1000
-    assert np.sum(a[-1].asnumpy() == 1000) == a.shape[1]
-
-
-def test_expand_dims():
-    a = nd.ones(shape=(LARGE_X, SMALL_Y))
-    res = nd.expand_dims(a, axis=1)
-    res.wait_to_read()
-    assert a[0][0][0] == 1
-    assert res.shape == (a.shape[0], 1, a.shape[1])
-
-
-def test_squeeze():
-    a = nd.ones(shape=(LARGE_X, SMALL_Y))
-    data = nd.expand_dims(a, axis=1)
-    res = nd.squeeze(data)
-    assert res.shape == a.shape
-
-
-def test_broadcast_div():
-    a = nd.ones(shape=(LARGE_X, SMALL_Y))
-    b = nd.ones(shape=(LARGE_X, 1)) * 2
-    res = a / b
-    assert np.sum(res[-1].asnumpy() == 0.5) == a.shape[1]
-
-
-def test_Dense(ctx=mx.cpu(0)):
-    data = mx.nd.ones(shape=(50*1000*1000, 100))
-    linear = gluon.nn.Dense(100)
-    linear.initialize(ctx=ctx)
-    res = linear(data)
-    assert res.shape == (50000000, 100)
-
-
-def test_where():
-    a = nd.ones(shape=(LARGE_X, SMALL_Y))
-    b = nd.arange(0, LARGE_X * SMALL_Y).reshape(LARGE_X, SMALL_Y)
-    res = nd.where(b > 100, a, b)
-    assert np.sum(res[-1].asnumpy() == 1) == b.shape[1]
-    csr_cond = nd.sparse.cast_storage(b < 10, 'csr')
-    res = nd.sparse.where(csr_cond, a, b)
-    assert np.sum(res[0].asnumpy() == 1) == 10
-
-
-def test_pick():
-    a = mx.nd.ones(shape=(256 * 35, 1024 * 1024))
-    b = mx.nd.ones(shape=(256 * 35, ))
-    res = mx.nd.pick(a, b)
-    assert res.shape == b.shape
-
-
-def test_depthtospace():
-    def numpy_depth_to_space(x, blocksize):
-        b, c, h, w = x.shape[0], x.shape[1], x.shape[2], x.shape[3]
-        tmp = np.reshape(x, [b, blocksize, blocksize, c // (blocksize**2), h,
-                         w])
-        tmp = np.transpose(tmp, [0, 3, 4, 1, 5, 2])
-        y = np.reshape(tmp, [b, c // (blocksize**2), h * blocksize,
-                       w * blocksize])
-        return y
-
-    shape_inp = (LARGE_X, 8, 4, 2)
-    data = rand_ndarray(shape_inp, 'default')
-    data_np = data.asnumpy()
-    expected = numpy_depth_to_space(data_np, 2)
-    output = mx.nd.depth_to_space(data, 2)
-    assert_almost_equal(output.asnumpy(), expected, atol=1e-3, rtol=1e-3)
-
-
-def test_spacetodepth():
-    def numpy_space_to_depth(x, blocksize):
-        b, c, h, w = x.shape[0], x.shape[1], x.shape[2], x.shape[3]
-        tmp = np.reshape(x, [b, c, h // blocksize, blocksize, w // blocksize,
-                         blocksize])
-        tmp = np.transpose(tmp, [0, 3, 5, 1, 2, 4])
-        y = np.reshape(tmp, [b, c * (blocksize**2), h // blocksize,
-                       w // blocksize])
-        return y
-
-    shape_inp = (LARGE_X, 2, 8, 4)
-    data = rand_ndarray(shape_inp, 'default')
-    data_np = data.asnumpy()
-    expected = numpy_space_to_depth(data_np, 2)
-    output = mx.nd.space_to_depth(data, 2)
-    assert_almost_equal(output.asnumpy(), expected, atol=1e-3, rtol=1e-3)
-
-
-@with_seed()
-def test_diag():
-    a_np = np.random.random((LARGE_X, SMALL_Y)).astype(np.float32)
-    a = mx.nd.array(a_np)
-
-    # k == 0
-    r = mx.nd.diag(a)
-    assert_almost_equal(r.asnumpy(), np.diag(a_np))
-
-    # k == 1
-    k = 1
-    r = mx.nd.diag(a, k=k)
-    assert_almost_equal(r.asnumpy(), np.diag(a_np, k=k))
-
-    # k == -1
-    k = -1
-    r = mx.nd.diag(a, k=k)
-    assert_almost_equal(r.asnumpy(), np.diag(a_np, k=k))
-
-    # random k
-    k = np.random.randint(-min(LARGE_X, SMALL_Y) + 1, min(LARGE_X, SMALL_Y))
-    r = mx.nd.diag(a, k=k)
-    assert_almost_equal(r.asnumpy(), np.diag(a_np, k=k))
-
-
-@with_seed()
-def test_ravel_multi_index():
-    x1, y1 = rand_coord_2d((LARGE_X - 100), LARGE_X, 10, SMALL_Y)
-    x2, y2 = rand_coord_2d((LARGE_X - 200), LARGE_X, 9, SMALL_Y)
-    x3, y3 = rand_coord_2d((LARGE_X - 300), LARGE_X, 8, SMALL_Y)
-    indices_2d = [[x1, x2, x3], [y1, y2, y3]]
-    idx = mx.nd.ravel_multi_index(mx.nd.array(indices_2d, dtype=np.int64),
-                                  shape=(LARGE_X, SMALL_Y))
-    idx_numpy = np.ravel_multi_index(indices_2d, (LARGE_X, SMALL_Y))
-    assert np.sum(1 for i in range(idx.size) if idx[i] == idx_numpy[i]) == 3
-
-
-@with_seed()
-def test_unravel_index():
-    x1, y1 = rand_coord_2d((LARGE_X - 100), LARGE_X, 10, SMALL_Y)
-    x2, y2 = rand_coord_2d((LARGE_X - 200), LARGE_X, 9, SMALL_Y)
-    x3, y3 = rand_coord_2d((LARGE_X - 300), LARGE_X, 8, SMALL_Y)
-    original_2d_indices = [[x1, x2, x3], [y1, y2, y3]]
-    idx_numpy = np.ravel_multi_index(original_2d_indices, (LARGE_X, SMALL_Y))
-    indices_2d = mx.nd.unravel_index(mx.nd.array(idx_numpy, dtype=np.int64),
-                                     shape=(LARGE_X, SMALL_Y))
-    assert (indices_2d.asnumpy() == np.array(original_2d_indices)).all()
-
-
-def test_transpose():
-    test_dtypes = [np.float32, np.int64]
-    for dtype in test_dtypes:
-        b = create_2d_tensor(rows=LARGE_X, columns=SMALL_Y, dtype=dtype)
-        t = b.T
+def test_nn():
+    def check_gluon_embedding():
+        m = gluon.nn.Embedding(SMALL_Y, MEDIUM_X)
+        m.initialize()
+        a = nd.zeros((MEDIUM_X, SMALL_Y))
+        b = m(a)
+        assert b.shape == (MEDIUM_X, SMALL_Y, MEDIUM_X)
+        assert b.asnumpy().size == LARGE_SIZE
+
+    def check_fully_connected():
+        a = nd.ones(shape=(LARGE_X, SMALL_Y))
+        b = nd.ones(shape=(SMALL_Y, SMALL_Y))
+        c = nd.ones(shape=(b.shape[0],))
+
+        # w/o bias
+        res = nd.FullyConnected(a, b, num_hidden=b.shape[0], no_bias=True)
+        assert np.sum(res[-1].asnumpy() == a.shape[1]) == b.shape[0]
+
+        # w/ bias
+        res = nd.FullyConnected(a, b, c, num_hidden=b.shape[0], no_bias=False)
+        assert np.sum(res[-1].asnumpy() == a.shape[1] + 1) == b.shape[0]
+
+    def check_dense(ctx=mx.cpu(0)):
+        data = mx.nd.ones(shape=(50*1000*1000, 100))
+        linear = gluon.nn.Dense(100)
+        linear.initialize(ctx=ctx)
+        res = linear(data)
+        assert res.shape == (50000000, 100)
+
+    def check_softmax():
+        input_data = mx.nd.ones((SMALL_Y, LARGE_X))
+        for axis in [0, 1]:
+            true_output = np.full((SMALL_Y, LARGE_X), (1 / input_data.shape[axis]))
+            output = nd.softmax(input_data, axis=axis)
+            assert_almost_equal(output.asnumpy(), true_output, rtol=1e-5, atol=1e-5)
+
+    def check_softmax_cross_entropy():
+        # dtype of input data, mxnet cross entropy set explicitly to float64
+        # numpy implicitly takes care of double precision
+        batch_size = SMALL_Y
+        num_labels = LARGE_X
+        input_data = mx.nd.ones((batch_size, num_labels), dtype="float64")
+        input_label = mx.nd.zeros((batch_size,), dtype="float64")
+        true_softmax = np.full((batch_size, num_labels), (1 / num_labels))
+        # use 1/batch_size when softmax axis=0
+        # here 1/num_labels since softmax_cross_entropy uses default axis
+        # by default axis=1
+        np_one_hot_label = np.zeros((batch_size, num_labels))
+        np_one_hot_label[:, 0] = 1
+        true_softmax_cross_entropy = np.sum(-np.log(true_softmax) *
+                                            np_one_hot_label)
+        mx_softmax_cross_entropy = mx.nd.softmax_cross_entropy(input_data,
+                                                               input_label,
+                                                               dtype="float64")
+        assert_almost_equal(mx_softmax_cross_entropy.asnumpy(),
+                            true_softmax_cross_entropy, rtol=1e-3, atol=1e-5)
+
+    def check_softmax_output():
+        x = mx.sym.Variable('x')
+        label = mx.sym.Variable('label')
+        x_nd = mx.nd.ones((LARGE_X, SMALL_Y))
+        grad_x = mx.nd.zeros((LARGE_X, SMALL_Y))
+        label_nd = mx.nd.ones((LARGE_X))
+        sym = mx.sym.SoftmaxOutput(data=x, label=label, ignore_label=0,
+                                   use_ignore=False)
+
+        ex = sym.bind(ctx=default_context(), args={'x': x_nd, 'label': label_nd},
+                      args_grad=None)
+        ex.forward(is_train=False)
+        softmax_out = ex.outputs[0][0].asnumpy()
+        expected_softmax_out = (1 / SMALL_Y) * mx.nd.ones((SMALL_Y)).asnumpy()
+        assert np.isclose(softmax_out, expected_softmax_out).all()
+
+        ex = sym.bind(ctx=default_context(), args={'x': x_nd, 'label': label_nd},
+                      args_grad={'x': grad_x})
+        ex.forward(is_train=True)
+        softmax_out = ex.outputs[0][0].asnumpy()
+        expected_softmax_out = (1 / SMALL_Y) * mx.nd.ones((SMALL_Y)).asnumpy()
+        assert np.isclose(softmax_out, expected_softmax_out).all()
+
+        ex.backward(is_train=True)
+        grad_out = ex.grad_arrays[0][0].asnumpy()
+        k = int(label_nd[0].asscalar())
+        expected_grad_out = np.zeros((SMALL_Y,))
+        expected_grad_out[k] = -1
+        assert np.isclose(grad_out - softmax_out, expected_grad_out).all()
+
+    def np_softmax(x, axis=-1, temperature=1.0):
+        x = x - np.max(x, axis=axis, keepdims=True)
+        x = np.exp(x/temperature)
+        x /= np.sum(x, axis=axis, keepdims=True)
+        return x
+
+    def check_log_softmax():
+        ndim = 2
+        shape = (SMALL_Y, LARGE_X)
+        axis = np.random.randint(0, ndim)
+        data = np.random.uniform(-2, 2, size=shape)
+        sym = mx.sym.log_softmax(axis=axis-ndim)
+        check_symbolic_forward(sym, [data], [np.log(np_softmax(data, axis=axis)+1e-20)])
+
+    # TODO: correctness of prelu (currently flaky)
+    def check_leaky_relu():
+        a = -1*mx.nd.ones((LARGE_X, SMALL_Y))
+
+        def check_leaky():
+            res = mx.nd.LeakyReLU(a, act_type="leaky", slope=0.3)
+            assert_almost_equal(res[-1][-1].asnumpy(), 0.3*a[-1][-1].asnumpy(), atol=1e-3, rtol=1e-3)
+
+        def check_elu():
+            res = mx.nd.LeakyReLU(a, act_type="elu", slope=0.3)
+            assert_almost_equal(res[-1][-1].asnumpy(), 0.3*(np.exp(a[-1][-1].asnumpy())-1), atol=1e-3, rtol=1e-3)
+
+        def check_selu():
+            lam = 1.0507009873554804934193349852946
+            alpha = 1.6732632423543772848170429916717
+            res = mx.nd.LeakyReLU(a, act_type="selu")
+            assert_almost_equal(res[-1][-1].asnumpy(), (lam * alpha * (np.exp(a[-1][-1].asnumpy())-1)), atol=1e-3, rtol=1e-3)
+
+        def check_rrelu():
+            lower = 0.125
+            upper = 0.333999991
+            res = mx.nd.LeakyReLU(a, act_type="rrelu")
+            assert_almost_equal(res[0][-1][-1].asnumpy(), (lower + upper) / 2 * a[-1][-1].asnumpy(), atol=1e-3, rtol=1e-3)
+
+        check_leaky()
+        check_elu()
+        check_selu()
+        check_rrelu()
+
+    def check_pooling():
+        a = mx.nd.ones((MEDIUM_X, 200, SMALL_Y, SMALL_Y))
+
+        def check_avg_pooling():
+            res = mx.nd.Pooling(a, kernel=(5, 5), pool_type='avg')
+            assert_almost_equal(res[-1][-1][-1][-1].asnumpy(), 1.0000001, atol=1e-3, rtol=1e-3)
+            assert res.shape[-1] == SMALL_Y - 5 + 1
+
+        def check_max_pooling():
+            res = mx.nd.Pooling(a, kernel=(5, 5), pool_type='max')
+            assert_almost_equal(res[-1][-1][-1][-1].asnumpy(), 1., atol=1e-3, rtol=1e-3)
+            assert res.shape[-1] == SMALL_Y - 5 + 1
+
+        def check_sum_pooling():
+            res = mx.nd.Pooling(a, kernel=(5, 5), pool_type='sum')
+            assert_almost_equal(res[-1][-1][-1][-1].asnumpy(), 25, atol=1e-3, rtol=1e-3)
+            assert res.shape[-1] == SMALL_Y - 5 + 1
+
+        def check_lp_pooling():
+            res = mx.nd.Pooling(a, kernel=(5, 5), pool_type='lp', p_value=2)
+            assert_almost_equal(res[-1][-1][-1][-1].asnumpy(), 5., atol=1e-3, rtol=1e-3)
+            assert res.shape[-1] == SMALL_Y - 5 + 1
+
+            res = mx.nd.Pooling(a, kernel=(5, 5), pool_type='lp', p_value=1)
+            assert_almost_equal(res[-1][-1][-1][-1].asnumpy(), 25., atol=1e-3, rtol=1e-3)
+            assert res.shape[-1] == SMALL_Y - 5 + 1
+
+        check_avg_pooling()
+        check_max_pooling()
+        check_sum_pooling()
+        check_lp_pooling()
+
+    def check_layer_norm():
+        dtype = np.float32
+        forward_check_eps = 1E-3
+        axis = 1
+        eps = 1E-5
+        in_shape = (LARGE_X, SMALL_Y)
+        ctx = mx.cpu()
+
+        def npy_layer_norm(data, gamma, beta, axis=1, eps=1E-5):
+            if axis < 0:
+                axis += data.ndim
+            broadcast_shape = [1 for _ in range(data.ndim)]
+            broadcast_shape[axis] = data.shape[axis]
+            mean = data.mean(axis=axis, keepdims=True).astype(dtype)
+            var = data.var(axis=axis, keepdims=True).astype(dtype)
+            std = np.sqrt(var + dtype(eps)).astype(dtype)
+            out = np.reshape(gamma, broadcast_shape) * (data - mean) / std + \
+                  np.reshape(beta, broadcast_shape)
+            return out
+        data = np.random.normal(0, 1, in_shape).astype(dtype)
+        gamma = np.random.normal(0, 1, (in_shape[axis],)).astype(dtype)
+        beta = np.random.normal(0, 1, (in_shape[axis],)).astype(dtype)
+        data_s = mx.symbol.Variable('data')
+        gamma_s = mx.symbol.Variable('gamma')
+        beta_s = mx.symbol.Variable('beta')
+        out_s = mx.symbol.LayerNorm(data=data_s, gamma=gamma_s, beta=beta_s,
+                                    axis=axis, eps=eps)
+        exe = out_s.simple_bind(ctx, data=in_shape)
+        exe.arg_dict['data'][:] = data
+        exe.arg_dict['gamma'][:] = gamma
+        exe.arg_dict['beta'][:] = beta
+        out_nd = exe.forward()[0]
+        out = npy_layer_norm(data, gamma, beta, axis, eps)
+        assert_almost_equal(out, out_nd.asnumpy(), forward_check_eps,
+                            forward_check_eps)
+
+    # TODO: correctness of dropout
+    # currently only test for dropout to work
+    # since testing for correctness involves flakiness issue #14288
+    def check_dropout():
+        shape = (LARGE_X, SMALL_Y)
+        x = mx.sym.var('data')
+        y = mx.sym.Dropout(x, p=1, cudnn_off=True)
+        exe = y.simple_bind(ctx=default_context(), data=shape)
+        exe.arg_arrays[0][:] = 1
+        out = exe.forward(is_train=True)
+        nd.waitall()
+        assert out[0].shape == shape
+
+    def check_activation():
+        x = mx.nd.ones((LARGE_X, SMALL_Y))
+        check_x = -2
+        x[-1, -1] = check_x
+        # Hyperbolic tangent (tanh)
+        # y = (exp(x)-exp(-x))/(exp(x)+exp(-x))
+        y = mx.nd.Activation(x, act_type="tanh")
+        tanh_x = ((np.exp(check_x)-np.exp(-check_x))/(np.exp(check_x)+np.exp(-check_x)))
+        assert y[-1][-1] == np.float32(tanh_x)
+        # Recitified Linear Unit (relu)
+        # y = max(x,0)
+        y = mx.nd.Activation(x, act_type="relu")
+        assert y[-1][-1] == 0
+        # Sigmoid
+        # y = x/(1+abs(x))
+        y = mx.nd.Activation(x, act_type="sigmoid")
+        sigmoid_x = (1/(1+math.exp(-check_x)))
+        assert_almost_equal(y[-1][-1].asnumpy(), np.float32(sigmoid_x), atol=1e-3, rtol=1e-3)
+        # Soft Sign
+        # y = 1/(1+exp(-x))
+        y = mx.nd.Activation(x, act_type="softsign")
+        softsign_x = (check_x/(1+abs(check_x)))
+        assert y[-1][-1] == np.float32(softsign_x)
+
+
+    # TODO: correctness of batchnorm
+    # in future, we could test if mean, var of output
+    # matches target output's mean, var
+    def check_batchnorm():
+        def get_np_mean_var(data, running_mean, running_var, eps, use_global_status=True):
+            if not use_global_status:
+                # train mode, calculate the real mean and var
+                mean = np.mean(data, axis=(0, 2, 3))
+                mean_broad = np.expand_dims(mean, axis=0)
+                mean_broad = np.expand_dims(mean_broad, axis=2)
+                mean_broad = np.expand_dims(mean_broad, axis=3)
+                mean_broad = np.broadcast_to(mean_broad, data.shape)
+                var = np.square(data - mean_broad)
+                var = np.mean(var, axis=(0, 2, 3))
+            else:
+                # inference mode, use running_mean and running_var instead
+                mean = np.full((data.shape[1],), running_mean)
+                var = np.full((data.shape[1],), running_var)
+            # calculate the inverse of standard variance
+            invstdvar = 1. / np.sqrt(var + eps)
+            return mean, invstdvar
+        # Here use 4D input to cover mkldnn BN and non-mkldnn BN
+        shape = (1, 2, LARGE_X, SMALL_Y)
+        axis = 1  # default
+        eps = 1e-3
+        nch = shape[axis]
+        data = mx.nd.ones(shape=shape)
+        bn_gamma = mx.nd.random.uniform(shape=(nch,))
+        bn_beta = mx.nd.random.uniform(shape=(nch,))
+        bn_running_mean = mx.nd.zeros(nch)
+        bn_running_var = mx.nd.ones(nch)
+        output = mx.nd.BatchNorm(data, bn_gamma, bn_beta,
+                                 bn_running_mean, bn_running_var, output_mean_var=True)
+        assert output[0].shape == shape
+        mean, invstdvar = output[1], output[2]
+        np_mean, np_invstdvar = get_np_mean_var(data.asnumpy(), bn_running_mean.asnumpy(), bn_running_var.asnumpy(),
+                                                eps, use_global_status=True)
+        assert_almost_equal(mean.asnumpy(), np_mean)
+        assert_almost_equal(invstdvar.asnumpy(), np_invstdvar)
+
+    def check_relu():
+        def frelu(x):
+            return np.maximum(x, 0.0)
+
+        def frelu_grad(x):
+            return 1.0 * (x > 0.0)
+        shape = (SMALL_Y, LARGE_X)
+        x = mx.symbol.Variable("x")
+        y = mx.sym.relu(x)
+        xa = np.random.uniform(low=-1.0, high=1.0, size=shape)
+        eps = 1e-4
+        xa[abs(xa) < eps] = 1.0
+        ya = frelu(xa)
+        ga = frelu_grad(xa)
+        check_symbolic_forward(y, [xa], [ya])
+
+    def check_sigmoid():
+        def fsigmoid(a):
+            return np.divide(1.0, (1.0 + np.exp(-a)))
+        shape = (SMALL_Y, LARGE_X)
+        x = mx.symbol.Variable("x")
+        y = mx.sym.sigmoid(x)
+        xa = np.random.uniform(low=-1.0, high=1.0, size=shape)
+        ya = fsigmoid(xa)
+        check_symbolic_forward(y, [xa], [ya])
+
+    def check_linear_and_logistic_regression():
+        shape = (LARGE_X, SMALL_Y)
+
+        def check_regression(symbol, forward, backward, shape):
+            # init executor
+            data_s = mx.symbol.Variable('data')
+            label_s = mx.symbol.Variable('label')
+            out_s = symbol(data=data_s, label=label_s)
+            grad_req = {'data': 'write', 'label': 'null'}
+            exe = out_s.simple_bind(ctx=default_context(), data=shape, label=shape, grad_req=grad_req)
+            arg_map = dict(zip(out_s.list_arguments(), exe.arg_arrays))
+            grad_map = dict(zip(out_s.list_arguments(), exe.grad_arrays))
+            # init data
+            data = mx.random.uniform(-1, -1, shape)
+            arg_map["data"][:] = data
+            atol = 1e-5
+            density = 0.5
+            stype = 'default'
+            label = arg_map["label"]
+            label[:] = rand_ndarray(shape, stype, density=density)
+            exe.forward(is_train=True)
+            exe.backward()
+            np_out = forward(data.asnumpy())
+            out_grad = backward(np_out, label.asnumpy().reshape(np_out.shape)) / shape[1]
+            assert_almost_equal(exe.outputs[0].asnumpy(), np_out, atol=atol)
+            assert_almost_equal(grad_map["data"].asnumpy(), out_grad, atol=atol)
+
+        check_regression(mx.symbol.LogisticRegressionOutput,
+                         lambda x: 1.0 / (1.0 + np.exp(-x)),
+                         lambda x, y: x - y,
+                         shape)
+        check_regression(mx.symbol.LinearRegressionOutput,
+                         lambda x: x,
+                         lambda x, y: x - y,
+                         shape)
+
+    def check_l2_normalization():
+        x = nd.ones((2, LARGE_X*2))
+        x[0] = 3
+        x[1] = 4
+        # Channel Mode
+        z = x.reshape(1, 2, LARGE_X*2)
+        y = nd.L2Normalization(z, mode='channel')
+        assert y[0][0][0] == 0.6
+        assert y[0][0][-1] == 0.6
+        assert y[0][1][0] == 0.8
+        assert y[0][1][-1] == 0.8
+        # Instance Mode
+        z = x.T
+        y = nd.L2Normalization(z, mode='instance')
+        assert y[0][0] == 0.6
+        assert y[0][1] == 0.8
+        assert y[-1][0] == 0.6
+        assert y[-1][1] == 0.8
+        # Spatial Mode
+        z = z.reshape(1, 200000000, 2)
+        y = nd.L2Normalization(z, mode='spatial')
+        assert y[0][0][0] == 0.6
+        assert y[0][0][1] == 0.8
+        assert y[0][-1][0] == 0.6
+        assert y[0][-1][1] == 0.8
+
+    def check_instance_norm():
+        dtype = np.float32
+        forward_check_eps = 1E-3
+        axis = -1
+        eps = 1E-5
+        in_shape = (LARGE_X, 1, SMALL_Y)
+        ctx = mx.cpu()
+
+        # Implementation of instance normalization using numpy
+        def npy_instance_norm(data, gamma, beta, axis, eps=1E-5):
+            if axis < 0:
+                axis += data.ndim
+            broadcast_shape = [1 for _ in range(data.ndim)]
+            broadcast_shape[axis] = data.shape[axis]
+            mean = data.mean(axis=axis, keepdims=True).astype(dtype)
+            var = data.var(axis=axis, keepdims=True).astype(dtype)
+            std = np.sqrt(var + dtype(eps)).astype(dtype)
+            out = gamma * (data - mean) / std + \
+                  beta
+            return out
+        data = np.random.normal(0, 1, in_shape).astype(dtype)
+        gamma = np.random.normal(0, 1, (1,)).astype(dtype)
+        beta = np.random.normal(0, 1, (1,)).astype(dtype)
+        data_s = mx.symbol.Variable('data')
+        gamma_s = mx.symbol.Variable('gamma')
+        beta_s = mx.symbol.Variable('beta')
+        out_s = mx.symbol.InstanceNorm(data=data_s, gamma=gamma_s, beta=beta_s,
+                                       eps=eps)
+        exe = out_s.simple_bind(ctx, data=in_shape)
+        exe.arg_dict['data'][:] = data
+        exe.arg_dict['gamma'][:] = gamma
+        exe.arg_dict['beta'][:] = beta
+        out_nd = exe.forward()[0]
+        # Calls implementation of instance norm in numpy and compares the output
+        out = npy_instance_norm(data, gamma, beta, axis, eps)
+        assert_almost_equal(out, out_nd.asnumpy(), forward_check_eps,
+                            forward_check_eps)
+
+    check_gluon_embedding()
+    check_fully_connected()
+    check_dense(ctx=mx.cpu(0))
 
 Review comment:
   my bad. Copy paste error. https://github.com/apache/incubator-mxnet/pull/17305/files#diff-9ee911616af04047075035c95cf542fbR60 already has default parameter as cpu ctx. 
   
   We are not testing for GPU since theoretically its not possible for a 32-bit(4 Byte) variable to hold such a large value.
   
   ```
   (2^32*4)/(2^30) = 16 GB
   ``` 
   Unless its v100 w/ 32 GB Global mem.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services