You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by zh...@apache.org on 2020/07/19 21:13:50 UTC
[incubator-mxnet] branch master updated: Unittest tolerance
handling improvements (#18694)
This is an automated email from the ASF dual-hosted git repository.
zhasheng pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git
The following commit(s) were added to refs/heads/master by this push:
new 146b49e Unittest tolerance handling improvements (#18694)
146b49e is described below
commit 146b49ead32b941f74db694f2d453cb25650d252
Author: Dick Carter <dc...@nvidia.com>
AuthorDate: Sun Jul 19 14:12:50 2020 -0700
Unittest tolerance handling improvements (#18694)
* Add sm arch 80 to Makefile
* Add TF32 to cuBLAS GEMMs
Signed-off-by: Serge Panev <sp...@nvidia.com>
* Add CUDA version guards
Signed-off-by: Serge Panev <sp...@nvidia.com>
* Remove useless TF32 for double and old CUDA version
Signed-off-by: Serge Panev <sp...@nvidia.com>
* Factorize VERSION_ADJUSTED_TF32_MATH
Signed-off-by: Serge Panev <sp...@nvidia.com>
* Add TF32 considerations to test_util.py:check_consistency()
* Bypass test_gluon_gpu.py:test_large_models if gmem >32GB
* Default tols in assert_almost_equal() now a function of dtype and ctx
* Expand types listed by default_tols()
* Fix pylint
* All with_seed() tests to waitall in teardown
* Elevate MXNET_TEST_SEED logging to WARNING
* Revert test_gluon_gpu.py:test_rnn_layer to default tols
* Fix test_gluon_model_zoo_gpu.py::test_inference and test_operator_gpy.py::test_np_linalg_{solve,tensorinv}
* test_numpy_interoperability.py to not fix seed for rest of CI
* Further fix to test_np_linalg_tensorinv
* Fix test_gluon_data.py:test_dataloader_context when run on 1-GPU system.
* Fix test_operator_gpu.py::test_embedding_with_type
* Fix test_operator_gpu.py::{test_*convolution_large_c,test_np_linalg_tensorsolve}
* Remove unneeded print() from test_numpy_interoperability.py
* Unify tol handling of check_consistency() and assert_almost_equal(). Test tweeks.
* Add tol handling of assert_almost_equal() with number args
* Add tol handling of bool comparisons
* Fix test_numpy_op.py::test_np_random_rayleigh
* Fix test_operator_gpu.py::test_batchnorm_with_type
* Fix test_gluon.py::test_sync_batchnorm in cpu selftest
* Improve unittest failure reporting
* Add to robustness of test_operator_gpu.py::test_embedding_with_type
* Check_consistency() to use equal backward gradients for increased test robustness
* Fix test_operator_gpu.py::test_{fully_connected,gemm}. Add default_numeric_eps().
* test_utils.py fix for numeric gradient calc
* Reinstate rtol=1e-2 for test_operator.py::test_order
* Remove auto-cast of check_consistency() input data to least precise dtype (not needed)
* Fix test_operator.py::test_{reciprocol,cbrt,rcbrt}_op
* Expand default float64 numeric_eps for test_operator_gpu.py::test_sofmin
* Fix segfault-on-error of @retry decorator. Add test isolation.
* assert_almost_equal() to handle a,b scalars
* Fix test_operator_gpu.py::test_gluon_{mvn,mvn_v1} race
* Fix test_operator_gpu.py::test_flatten_slice_after_conv via scale
* Remove test_utils.py:almost_equal_ignore_nan()
* Fix sample vs. pop variance issue with test_numpy_op.py::test_npx_batch_norm
* Expose test_utils.py:effective_dtype() and use to fix test_operator_gpu.py::test_np_linalg_svd
* Fix true_divide int_array / int_scalar -> float_array to honor np_default_dtype
* Try test_elemwise_binary_ops serial to avoid pytest worker crash
* Fix (log_)softmax backward on empty ndarray
* Temporarily log all CI seeds to troubleshoot seed non-determinism
* Revert "Temporarily log all CI seeds to troubleshoot seed non-determinism"
This reverts commit f60eff20785b812ac4fcd70d51359ee0cbfb3e47.
* Temp log all CI seeds to troubleshoot unwanted seed determinism
* Revert "Add sm arch 80 to Makefile"
This reverts commit f9306cecc53b0633ef5f5b7b000802fbf0d73fe9.
* Same fix of sample vs. pop variance issue, now with test_operator_gpu.py::test_batchnorm
* Revert "Temp log all CI seeds to troubleshoot unwanted seed determinism"
This reverts commit ff328efb0be3445690669d5437a6af575ff12b49.
* Marking test_sparse_dot_grad with garbage_expected after teardown error
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_gluon_kl{_v1,}
* Temp skip of test_aggregate_duplication on gpu
* Add seeding to test_{numpy,}_contrib_gluon_data_vision.py. Make created files unique.
* Add ndarray module isolation to help debug test_bbox_augmenters worker crash
* Marking test_sparse_square_sum serial after pytest worker crash
* Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_half_cauchy{_v1,}
Co-authored-by: Serge Panev <sp...@nvidia.com>
Co-authored-by: Bart Gawrych <ga...@intel.com>
---
python/mxnet/test_utils.py | 377 +++++++++++++--------
src/operator/linalg.h | 8 +
src/operator/linalg_impl.h | 34 +-
src/operator/nn/log_softmax.cc | 1 +
src/operator/nn/softmax.cc | 1 +
src/operator/numpy/np_true_divide-inl.h | 14 +-
tests/python/gpu/test_gluon_gpu.py | 16 +-
tests/python/gpu/test_gluon_model_zoo_gpu.py | 2 +-
tests/python/gpu/test_operator_gpu.py | 95 +++---
tests/python/gpu/test_profiler_gpu.py | 2 +
tests/python/unittest/common.py | 22 +-
tests/python/unittest/test_autograd.py | 1 +
.../unittest/test_contrib_gluon_data_vision.py | 5 +-
tests/python/unittest/test_gluon.py | 4 +-
tests/python/unittest/test_gluon_probability_v1.py | 20 +-
tests/python/unittest/test_gluon_probability_v2.py | 21 +-
tests/python/unittest/test_ndarray.py | 2 +-
.../test_numpy_contrib_gluon_data_vision.py | 9 +-
.../python/unittest/test_numpy_interoperability.py | 7 +-
tests/python/unittest/test_numpy_op.py | 46 ++-
tests/python/unittest/test_operator.py | 126 ++++---
tests/python/unittest/test_sparse_operator.py | 1 +
22 files changed, 487 insertions(+), 327 deletions(-)
diff --git a/python/mxnet/test_utils.py b/python/mxnet/test_utils.py
index dd02783..9ec0c6c 100644
--- a/python/mxnet/test_utils.py
+++ b/python/mxnet/test_utils.py
@@ -70,19 +70,110 @@ def default_dtype():
# _TODO: get default dtype from environment variable
return np.float32
+def default_rtols():
+ """Get default relative tolerances for data comparisons involving each data type."""
+ return {np.dtype(np.float16): 1e-2,
+ np.dtype(np.float32): 1e-4,
+ np.dtype(np.float64): 1e-5,
+ np.dtype(np.bool): 0,
+ np.dtype(np.int8): 0,
+ np.dtype(np.uint8): 0,
+ np.dtype(np.int32): 0,
+ np.dtype(np.uint32): 0,
+ np.dtype(np.int64): 0,
+ np.dtype(np.uint64): 0}
+
+def default_atols():
+ """Get default absolute tolerances for data comparisons involving each data type."""
+ return {np.dtype(np.float16): 1e-1,
+ np.dtype(np.float32): 1e-3,
+ np.dtype(np.float64): 1e-20,
+ np.dtype(np.bool): 0,
+ np.dtype(np.int8): 0,
+ np.dtype(np.uint8): 0,
+ np.dtype(np.int32): 0,
+ np.dtype(np.uint32): 0,
+ np.dtype(np.int64): 0,
+ np.dtype(np.uint64): 0}
+
+def default_numeric_eps():
+ """Get default epsilon for finite difference gradient calculations with data type."""
+ # prefer a power-of-two eps, since no bits are dropped when serving as an input delta
+ return {np.dtype(np.float16): 1.0 / 2**6,
+ np.dtype(np.float32): 1.0 / 2**9,
+ np.dtype(np.float64): 1.0 / 2**14}
+
+
+def effective_dtype(dat):
+ """ Return the most appropriate dtype for determining the tolerance used in dat comparisons
+ Parameters
+ ----------
+ dat : np.ndarray or mx.nd.array or mx.np.ndarray
+ """
+ # On arch 80 gpus, a float32-io gemm or conv op will trim the mantissa of data
+ # inputs to be of comparable precision to a float16, so float16 becomes the
+ # 'effective dtype' for tolerance tests involving such op outputs.
+
+ # Is TF32 enabled in the ctx (the default on arch 80 GPUs)
+ def is_TF32_enabled(ctx):
+ try:
+ return (ctx.device_type == 'gpu' and
+ get_cuda_compute_capability(ctx) == 80 and
+ os.environ.get('NVIDIA_TF32_OVERRIDE') != '0')
+ except: # pylint: disable=bare-except
+ return False
+
+ ctx = dat.ctx if hasattr(dat, 'ctx') else None
+ dtype = np.dtype(dat.dtype)
+ if dtype == np.dtype(np.float32) and is_TF32_enabled(ctx):
+ return np.dtype(np.float16)
+ else:
+ return dtype
-def get_atol(atol=None):
- """Get default numerical threshold for regression test."""
- # _TODO: get from env variable, different threshold might
- # be needed for different device and dtype
- return 1e-20 if atol is None else atol
+
+def get_tolerance(dat, tol, default_tol):
+ """ Return the tolerance to be used for dat comparisons based on the given tol, datatype and context.
+ Parameters
+ ----------
+ dat : np.ndarray or mx.nd.array or mx.np.ndarray
+ tol : float, or a dict of dtype->float
+ default_tol : default dict of dtype->float for all types
+ """
+
+ if isinstance(tol, numbers.Number):
+ return tol
+
+ # If the caller has supplied a tol dict, use that if it has an entry for dtype,
+ # else use the supplied default tol dict.
+ dtype = effective_dtype(dat)
+ tol = {} if tol is None else tol
+ return tol.get(dtype, default_tol[dtype])
-def get_rtol(rtol=None):
+def get_tols(x, y, rtol, atol):
+ """For comparing two datasets 'x' and 'y', what relative and absolute tolerances should be used."""
+ # Tolerance analysis needs 'dtype' of 'x' and 'y', so convert numbers to numpy scalars as needed
+ if isinstance(x, numbers.Number):
+ x = np.array(x)
+ if isinstance(y, numbers.Number):
+ y = np.array(y)
+
+ # If tols are not specified, use the largest default tol for 'x' and 'y' based on their ctx and dtype.
+ rtol = max(get_tolerance(x, rtol, default_rtols()),
+ get_tolerance(y, rtol, default_rtols()))
+ atol = max(get_tolerance(x, atol, default_atols()),
+ get_tolerance(y, atol, default_atols()))
+
+ return rtol, atol
+
+
+def get_atol(atol=None, dtype=np.dtype(np.float64)):
"""Get default numerical threshold for regression test."""
- # _TODO: get from env variable, different threshold might
- # be needed for different device and dtype
- return 1e-5 if rtol is None else rtol
+ return default_atols()[dtype] if atol is None else atol
+
+def get_rtol(rtol=None, dtype=np.dtype(np.float64)):
+ """Get default numerical threshold for regression test."""
+ return default_rtols()[dtype] if rtol is None else rtol
def get_etol(etol=None):
"""Get default numerical threshold for regression test."""
@@ -499,10 +590,8 @@ def np_reduce(dat, axis, keepdims, numpy_reduce_func):
return ret
-def find_max_violation(a, b, rtol=None, atol=None):
+def _find_max_violation(a, b, rtol, atol):
"""Finds and returns the location of maximum violation."""
- rtol = get_rtol(rtol)
- atol = get_atol(atol)
# 'smart' absdiff that considers inf's as equals (to match np.allclose)
absdiff = np.where(np.equal(a, b), 0, np.abs(a-b))
tol = atol + rtol*np.abs(b)
@@ -565,9 +654,9 @@ def assert_almost_equal(a, b, rtol=None, atol=None, names=('a', 'b'), equal_nan=
----------
a : np.ndarray or mx.nd.array
b : np.ndarray or mx.nd.array
- rtol : None or float
+ rtol : None or float or dict of dtype -> float
The relative threshold. Default threshold will be used if set to ``None``.
- atol : None or float
+ atol : None or float or dict of dtype -> float
The absolute threshold. Default threshold will be used if set to ``None``.
names : tuple of names, optional
The names used in error message when an exception occurs
@@ -579,8 +668,12 @@ def assert_almost_equal(a, b, rtol=None, atol=None, names=('a', 'b'), equal_nan=
if not use_broadcast:
checkShapes(a, b)
- rtol = get_rtol(rtol)
- atol = get_atol(atol)
+ rtol, atol = get_tols(a, b, rtol, atol)
+
+ if isinstance(a, mx.numpy.ndarray):
+ a = a.asnumpy()
+ if isinstance(b, mx.numpy.ndarray):
+ b = b.asnumpy()
use_np_allclose = isinstance(a, np.ndarray) and isinstance(b, np.ndarray)
if not use_np_allclose:
if not (hasattr(a, 'ctx') and hasattr(b, 'ctx') and a.ctx == b.ctx and a.dtype == b.dtype):
@@ -604,32 +697,37 @@ def assert_almost_equal(a, b, rtol=None, atol=None, names=('a', 'b'), equal_nan=
a = a.asnumpy()
b = b.asnumpy()
- index, rel = find_max_violation(a, b, rtol, atol)
- indexErr = index
- relErr = rel
-
- print('\n*** Maximum errors for vector of size {}: rtol={}, atol={}\n'.format(a.size, rtol, atol))
- aTmp = a.copy()
- bTmp = b.copy()
- i = 1
- while i <= a.size:
- if i <= mismatches[0]:
- print("%3d: Error %f %s" %(i, rel, locationError(a, b, index, names)))
+ index, rel = _find_max_violation(a, b, rtol, atol)
+ if index != ():
+ # a, b are the numpy arrays
+ indexErr = index
+ relErr = rel
+
+ print('\n*** Maximum errors for vector of size {}: rtol={}, atol={}\n'.format(a.size, rtol, atol))
+ aTmp = a.copy()
+ bTmp = b.copy()
+ i = 1
+ while i <= a.size:
+ if i <= mismatches[0]:
+ print("%3d: Error %f %s" %(i, rel, locationError(a, b, index, names)))
+
+ aTmp[index] = bTmp[index] = 0
+ if almost_equal(aTmp, bTmp, rtol, atol, equal_nan=equal_nan):
+ break
- aTmp[index] = bTmp[index] = 0
- if almost_equal(aTmp, bTmp, rtol, atol, equal_nan=equal_nan):
- break
+ i += 1
+ if i <= mismatches[1] or mismatches[1] <= 0:
+ index, rel = _find_max_violation(aTmp, bTmp, rtol, atol)
+ else:
+ break
- i += 1
- if i <= mismatches[1] or mismatches[1] <= 0:
- index, rel = find_max_violation(aTmp, bTmp, rtol, atol)
- else:
- break
+ mismatchDegree = "at least " if mismatches[1] > 0 and i > mismatches[1] else ""
+ errMsg = "Error %f exceeds tolerance rtol=%e, atol=%e (mismatch %s%f%%).\n%s" % \
+ (relErr, rtol, atol, mismatchDegree, 100*i/a.size, \
+ locationError(a, b, indexErr, names, maxError=True))
+ else:
+ errMsg = "Error %f exceeds tolerance rtol=%e, atol=%e.\n" % (rel, rtol, atol)
- mismatchDegree = "at least " if mismatches[1] > 0 and i > mismatches[1] else ""
- errMsg = "Error %f exceeds tolerance rtol=%e, atol=%e (mismatch %s%f%%).\n%s" % \
- (relErr, rtol, atol, mismatchDegree, 100*i/a.size, \
- locationError(a, b, indexErr, names, maxError=True))
np.set_printoptions(threshold=4, suppress=True)
msg = npt.build_err_msg([a, b], err_msg=errMsg)
@@ -648,16 +746,25 @@ def assert_almost_equal_with_err(a, b, rtol=None, atol=None, etol=None,
----------
a : np.ndarray
b : np.ndarray
+ rtol : None or float or dict of dtype -> float
+ The relative threshold. Default threshold will be used if set to ``None``.
+ atol : None or float or dict of dtype -> float
+ The absolute threshold. Default threshold will be used if set to ``None``.
threshold : None or float
The checking threshold. Default threshold will be used if set to ``None``.
etol : None or float
The error rate threshold. If etol is float, return true if error_rate < etol even if
any error is found.
+ names : tuple of names, optional
+ The names used in error message when an exception occurs
+ equal_nan : boolean, optional
+ The flag determining how to treat NAN values in comparison
+ mismatches : tuple of mismatches
+ Maximum number of mismatches to be printed (mismatches[0]) and determine (mismatches[1])
"""
etol = get_etol(etol)
if etol > 0:
- rtol = get_rtol(rtol)
- atol = get_atol(atol)
+ rtol, atol = get_tols(a, b, rtol, atol)
if isinstance(a, mx.nd.NDArray):
a = a.asnumpy()
if isinstance(b, mx.nd.NDArray):
@@ -665,7 +772,7 @@ def assert_almost_equal_with_err(a, b, rtol=None, atol=None, etol=None,
equals = np.isclose(a, b, rtol=rtol, atol=atol)
err = 1 - np.count_nonzero(equals) / equals.size
if err > etol:
- index, rel = find_max_violation(a, b, rtol, atol)
+ index, rel = _find_max_violation(a, b, rtol, atol)
indexErr = index
relErr = rel
@@ -683,7 +790,7 @@ def assert_almost_equal_with_err(a, b, rtol=None, atol=None, etol=None,
i += 1
if i <= mismatches[1] or mismatches[1] <= 0:
- index, rel = find_max_violation(aTmp, bTmp, rtol, atol)
+ index, rel = _find_max_violation(aTmp, bTmp, rtol, atol)
else:
break
@@ -698,31 +805,6 @@ def assert_almost_equal_with_err(a, b, rtol=None, atol=None, etol=None,
assert_almost_equal(a, b, rtol=rtol, atol=atol, equal_nan=equal_nan)
-def almost_equal_ignore_nan(a, b, rtol=None, atol=None):
- """Test that two NumPy arrays are almost equal (ignoring NaN in either array).
- Combines a relative and absolute measure of approximate eqality.
- If either the relative or absolute check passes, the arrays are considered equal.
- Including an absolute check resolves issues with the relative check where all
- array values are close to zero.
-
- Parameters
- ----------
- a : np.ndarray
- b : np.ndarray
- rtol : None or float
- The relative threshold. Default threshold will be used if set to ``None``.
- atol : None or float
- The absolute threshold. Default threshold will be used if set to ``None``.
- """
- a = np.copy(a)
- b = np.copy(b)
- nan_mask = np.logical_or(np.isnan(a), np.isnan(b))
- a[nan_mask] = 0
- b[nan_mask] = 0
-
- return almost_equal(a, b, rtol, atol)
-
-
def assert_almost_equal_ignore_nan(a, b, rtol=None, atol=None, names=('a', 'b')):
"""Test that two NumPy arrays are almost equal (ignoring NaN in either array).
Combines a relative and absolute measure of approximate eqality.
@@ -954,7 +1036,7 @@ def numeric_grad(executor, location, aux_states=None, eps=1e-4,
return approx_grads
-def check_numeric_gradient(sym, location, aux_states=None, numeric_eps=1e-3, rtol=1e-2,
+def check_numeric_gradient(sym, location, aux_states=None, numeric_eps=None, rtol=None,
atol=None, grad_nodes=None, use_forward_train=True, ctx=None,
grad_stype_dict=None, dtype=default_dtype()):
"""Verify an operation by checking backward pass via finite difference method.
@@ -979,8 +1061,10 @@ def check_numeric_gradient(sym, location, aux_states=None, numeric_eps=1e-3, rto
The auxiliary states required when generating the executor for the symbol.
numeric_eps : float, optional
Delta for the finite difference method that approximates the gradient.
- check_eps : float, optional
- relative error eps used when comparing numeric grad to symbolic grad.
+ rtol : None or float
+ The relative threshold. Default threshold will be used if set to ``None``.
+ atol : None or float
+ The absolute threshold. Default threshold will be used if set to ``None``.
grad_nodes : None or list or tuple or dict, optional
Names of the nodes to check gradient on
use_forward_train : bool
@@ -997,9 +1081,6 @@ def check_numeric_gradient(sym, location, aux_states=None, numeric_eps=1e-3, rto
[1] https://github.com/Theano/Theano/blob/master/theano/gradient.py
"""
assert dtype in (np.float16, np.float32, np.float64)
- # cannot use finite differences with small eps without high precision
- if dtype in (np.float32, np.float16):
- assert numeric_eps >= 1e-5
if ctx is None:
ctx = default_context()
@@ -1074,18 +1155,18 @@ def check_numeric_gradient(sym, location, aux_states=None, numeric_eps=1e-3, rto
executor.forward(is_train=True)
assert len(executor.outputs) == 1
+
+ eps = get_tolerance(executor.outputs[0], numeric_eps, default_numeric_eps())
+ # cannot use finite differences with small eps without high precision
+ if dtype in (np.float32, np.float16):
+ assert eps >= 1e-5
+
executor.backward()
- symbolic_grads = {}
- for k in grad_nodes:
- grad_k = executor.grad_dict[k]
- if grad_k is not None:
- symbolic_grads[k] = grad_k.asnumpy()
- else:
- symbolic_grads[k] = None
+ symbolic_grads = executor.grad_dict
numeric_gradients = numeric_grad(
executor, location_npy, aux_states_npy,
- eps=numeric_eps, use_forward_train=use_forward_train, dtype=dtype)
+ eps=eps, use_forward_train=use_forward_train, dtype=dtype)
for name in grad_nodes:
fd_grad = numeric_gradients[name]
@@ -1095,6 +1176,8 @@ def check_numeric_gradient(sym, location, aux_states=None, numeric_eps=1e-3, rto
assert_almost_equal(fd_grad, sym_grad, rtol, atol,
("NUMERICAL_%s"%name, "BACKWARD_%s"%name))
elif grad_req[name] == 'add':
+ if isinstance(sym_grad, mx.nd.NDArray):
+ sym_grad = sym_grad.asnumpy()
assert_almost_equal(fd_grad, sym_grad - orig_grad, rtol, atol,
("NUMERICAL_%s"%name, "BACKWARD_%s"%name))
elif grad_req[name] == 'null':
@@ -1103,7 +1186,7 @@ def check_numeric_gradient(sym, location, aux_states=None, numeric_eps=1e-3, rto
raise ValueError("Invalid grad_req %s for argument %s"%(grad_req[name], name))
-def check_symbolic_forward(sym, location, expected, rtol=1E-4, atol=None,
+def check_symbolic_forward(sym, location, expected, rtol=None, atol=None,
aux_states=None, ctx=None, equal_nan=False,
dtype=default_dtype()):
"""Compares a symbol's forward results with the expected ones.
@@ -1127,8 +1210,10 @@ def check_symbolic_forward(sym, location, expected, rtol=1E-4, atol=None,
Contains arrays corresponding to exe.outputs.
- if type is dict of str to np.ndarray
Contains mapping between sym.list_output() and exe.outputs.
- check_eps : float, optional
- Relative error to check to.
+ rtol : None or float
+ The relative threshold. Default threshold will be used if set to ``None``.
+ atol : None or float
+ The absolute threshold. Default threshold will be used if set to ``None``.
aux_states : list of np.ndarray of dict, optional
- if type is list of np.ndarray
Contains all the NumPy arrays corresponding to sym.list_auxiliary_states
@@ -1177,14 +1262,14 @@ def check_symbolic_forward(sym, location, expected, rtol=1E-4, atol=None,
executor.forward(is_train=False)
- outputs = [x.asnumpy() for x in executor.outputs]
+ outputs = executor.outputs
for output_name, expect, output in zip(sym.list_outputs(), expected, outputs):
assert_almost_equal(expect, output, rtol, atol,
("EXPECTED_%s"%output_name, "FORWARD_%s"%output_name),
equal_nan=equal_nan)
return executor.outputs
-def check_symbolic_backward(sym, location, out_grads, expected, rtol=1e-5, atol=None,
+def check_symbolic_backward(sym, location, out_grads, expected, rtol=None, atol=None,
aux_states=None, grad_req='write', ctx=None, grad_stypes=None,
equal_nan=False, dtype=default_dtype()):
"""Compares a symbol's backward results with the expected ones.
@@ -1215,8 +1300,10 @@ def check_symbolic_backward(sym, location, out_grads, expected, rtol=1e-5, atol=
Contains arrays corresponding to exe.grad_arrays
- if type is dict of str to np.ndarray
Contains mapping between ``sym.list_arguments()`` and exe.outputs.
- check_eps: float, optional
- Relative error to check to.
+ rtol : None or float
+ The relative threshold. Default threshold will be used if set to ``None``.
+ atol : None or float
+ The absolute threshold. Default threshold will be used if set to ``None``.
aux_states : list of np.ndarray or dict of str to np.ndarray
grad_req : str or list of str or dict of str to str, optional
Gradient requirements. 'write', 'add' or 'null'.
@@ -1302,7 +1389,7 @@ def check_symbolic_backward(sym, location, out_grads, expected, rtol=1e-5, atol=
assert out_grads is None
executor.backward(out_grads)
- grads = {k: v.asnumpy() for k, v in args_grad_data.items()}
+ grads = args_grad_data
for name in expected:
if grad_req[name] == 'write':
@@ -1310,7 +1397,8 @@ def check_symbolic_backward(sym, location, out_grads, expected, rtol=1e-5, atol=
("EXPECTED_%s"%name, "BACKWARD_%s"%name),
equal_nan=equal_nan)
elif grad_req[name] == 'add':
- assert_almost_equal(expected[name], grads[name] - args_grad_npy[name],
+ grad = grads[name].asnumpy() if isinstance(grads[name], mx.nd.NDArray) else grads[name]
+ assert_almost_equal(expected[name], grad - args_grad_npy[name],
rtol, atol, ("EXPECTED_%s"%name, "BACKWARD_%s"%name),
equal_nan=equal_nan)
elif grad_req[name] == 'null':
@@ -1395,16 +1483,8 @@ def check_speed(sym, location=None, ctx=None, N=20, grad_req=None, typ="whole",
raise ValueError('typ can only be "whole" or "forward".')
-def get_tolerance(rtol, ctx):
- if 'atol' in ctx:
- return ctx['atol']
- if 'atol_mult' in ctx:
- return ctx['atol_mult'] * rtol
- return rtol
-
-
def check_consistency(sym, ctx_list, scale=1.0, grad_req='write',
- arg_params=None, aux_params=None, tol=None,
+ arg_params=None, aux_params=None, rtol=None, atol=None,
raise_on_err=True, ground_truth=None, equal_nan=False,
use_uniform=False, rand_type=np.float64):
"""Check symbol gives the same output for different running context
@@ -1419,6 +1499,20 @@ def check_consistency(sym, ctx_list, scale=1.0, grad_req='write',
Standard deviation of the inner normal distribution. Used in initialization.
grad_req : str or list of str or dict of str to str
Gradient requirement.
+ arg_params : dict of input name -> input data
+ data to use for non-aux inputs
+ aux_params : dict of input name -> input data
+ data to use for aux inputs
+ rtol : float or dictionary dtype->float, optional
+ The relative error tolerance.
+ atol : float or dictionary dtype->float, optional
+ The absolute error tolerance.
+ raise_on_err : bool, optional, defaults to True
+ Should an error raise an exception (or just output exception message)
+ ground_truth : dict of output name -> data, optional
+ Provided ideal result to be compared against
+ equal_nan : bool, optional, defaults to False
+ Should nans be treated as equal in the comparison
use_unifrom: bool
Optional, When flag set to true,
random input data generated follows uniform distribution,
@@ -1454,20 +1548,6 @@ def check_consistency(sym, ctx_list, scale=1.0, grad_req='write',
'type_dict': {'concat_arg0': np.float32, 'concat_arg1': np.float32}}]
>>> check_consistency(sym, ctx_list)
"""
- if tol is None:
- tol = {np.dtype(np.float16): 1e-1,
- np.dtype(np.float32): 1e-3,
- np.dtype(np.float64): 1e-5,
- np.dtype(np.uint8): 0,
- np.dtype(np.int32): 0,
- np.dtype(np.int64): 0}
- elif isinstance(tol, numbers.Number):
- tol = {np.dtype(np.float16): tol,
- np.dtype(np.float32): tol,
- np.dtype(np.float64): tol,
- np.dtype(np.uint8): tol,
- np.dtype(np.int32): tol,
- np.dtype(np.int64): tol}
assert len(ctx_list) > 1
if isinstance(sym, Symbol):
@@ -1485,10 +1565,16 @@ def check_consistency(sym, ctx_list, scale=1.0, grad_req='write',
arg_params = {} if arg_params is None else arg_params
aux_params = {} if aux_params is None else aux_params
- for n, arr in exe_list[0].arg_dict.items():
+
+ # returns the least precise of two dtypes
+ def smaller_dtype(dt1, dt2):
+ return dt1 if dt2 is None or np.dtype(dt1).itemsize < np.dtype(dt2).itemsize else dt2
+
+ # It's important to assign random inputs in a deterministic order, for reproducibility.
+ for n, arr in _sorted_items(exe_list[0].arg_dict):
if n not in arg_params:
if use_uniform:
- arg_params[n] = np.random.uniform(low=-0.92, high=0.92,
+ arg_params[n] = np.random.uniform(low=-0.92 * scale, high=0.92 * scale,
size=arr.shape).astype(rand_type)
else:
arg_params[n] = np.random.normal(size=arr.shape,
@@ -1511,25 +1597,22 @@ def check_consistency(sym, ctx_list, scale=1.0, grad_req='write',
exe.forward(is_train=False)
dtypes = [np.dtype(exe.outputs[0].dtype) for exe in exe_list]
- max_idx = np.argmax(dtypes)
+ # Select the ground truth as the first model having the highest precision output[0]
+ gt_idx = np.argmax(dtypes)
gt = ground_truth
if gt is None:
- gt = exe_list[max_idx].output_dict.copy()
+ gt = exe_list[gt_idx].output_dict.copy()
for i, exe in enumerate(exe_list):
- if i == max_idx:
+ if i == gt_idx:
continue
- rtol = tol[dtypes[i]]
- atol = get_tolerance(rtol, ctx_list[i])
for name, arr in zip(output_names, exe.outputs):
- # Previously, the cast was to dtypes[i], but symbol may be mixed-precision,
- # so casting the ground truth to the actual output type seems more correct.
- gtarr = gt[name].astype(arr.dtype)
+ gtarr = gt[name]
try:
assert_almost_equal(arr, gtarr, rtol=rtol, atol=atol, equal_nan=equal_nan)
except AssertionError as e:
- print('Predict Err: ctx %d vs ctx %d at %s'%(i, max_idx, name))
+ print('Predict Err: ctx %d vs ctx %d at %s'%(i, gt_idx, name))
traceback.print_exc()
if raise_on_err:
raise e
@@ -1538,33 +1621,55 @@ def check_consistency(sym, ctx_list, scale=1.0, grad_req='write',
# train
if grad_req != 'null':
+ # Perform forward()
for exe in exe_list:
exe.forward(is_train=True)
- exe.backward(exe.outputs)
+ # Use the first executor's output data, cast to the least precise dtype,
+ # as the gradient data to pass to all executor's backward() call.
+ least_precise_dtype = [out.dtype for out in exe_list[0].outputs]
+ for exe in exe_list:
+ least_precise_dtype = [smaller_dtype(out1.dtype, dt) \
+ for (out1, dt) in zip(exe.outputs, least_precise_dtype)]
+ golden_data_np = [out.astype(dt).asnumpy() \
+ for (out, dt) in zip(exe_list[0].outputs, least_precise_dtype)]
+ # Perform backward()
+ for exe in exe_list:
+ out_grads = [mx.nd.array(golden_np, ctx=exe._ctx,
+ dtype=out.dtype).tostype(out.stype)
+ for (golden_np, out) in zip(golden_data_np, exe.outputs)]
+ exe.backward(out_grads)
+
gt = ground_truth
if gt is None:
- gt = exe_list[max_idx].output_dict.copy()
+ gt = exe_list[gt_idx].output_dict.copy()
if grad_req != 'null':
- gt.update(exe_list[max_idx].grad_dict)
+ gt.update(exe_list[gt_idx].grad_dict)
for i, exe in enumerate(exe_list):
- if i == max_idx:
+ if i == gt_idx:
continue
- rtol = tol[dtypes[i]]
- atol = get_tolerance(rtol, ctx_list[i])
curr = zip(output_names + arg_names, exe.outputs + exe.grad_arrays)
for name, arr in curr:
if gt[name] is None:
assert arr is None, name
continue
- # Previous cast was to dtypes[i], but symbol may be mixed-precision,
- # so casting the ground truth to the actual output type seems more correct.
- gtarr = gt[name].astype(arr.dtype)
+ gtarr = gt[name]
try:
- assert_almost_equal(arr, gtarr, rtol=rtol, atol=atol, equal_nan=equal_nan)
+ rt, at = rtol, atol
+ # If the primary data i/o type is float16, then the tolerance used when
+ # comparing a float32 input gradient (e.g. batchnorm gamma) should be float16.
+ smaller_arr_dtype = smaller_dtype(arr.dtype, dtypes[i])
+ smaller_gt_dtype = smaller_dtype(gtarr.dtype, dtypes[gt_idx])
+ if smaller_arr_dtype != arr.dtype or \
+ smaller_gt_dtype != gtarr.dtype:
+ rt, at = get_tols(arr.astype(smaller_arr_dtype),
+ gtarr.astype(smaller_gt_dtype), rtol, atol)
+ assert_almost_equal(arr, gtarr, rtol=rt, atol=at, equal_nan=equal_nan)
except AssertionError as e:
- print('Train Err: ctx %d vs ctx %d at %s'%(i, max_idx, name))
+ print('Train Err: {} {} ctx {} vs {} {} ctx {} at {}'.format(
+ np.dtype(arr.dtype).name, arr.ctx, i,
+ np.dtype(gtarr.dtype).name, gtarr.ctx, gt_idx, name))
traceback.print_exc()
if raise_on_err:
raise e
diff --git a/src/operator/linalg.h b/src/operator/linalg.h
index 291e251..3e82c6a 100644
--- a/src/operator/linalg.h
+++ b/src/operator/linalg.h
@@ -280,6 +280,14 @@ void linalg_batch_det_backward_helper(const Tensor<xpu, 3, DType>& LU,
const DType zero_det,
const mxnet::OpContext& ctx);
+#ifdef __CUDACC__
+#if CUDA_VERSION < 11000
+#define VERSION_ADJUSTED_TF32_MATH CUBLAS_DEFAULT_MATH
+#else
+#define VERSION_ADJUSTED_TF32_MATH CUBLAS_TF32_TENSOR_OP_MATH
+#endif
+#endif // __CUDACC__
+
#include "linalg_impl.h"
#endif // MXNET_OPERATOR_LINALG_H_
diff --git a/src/operator/linalg_impl.h b/src/operator/linalg_impl.h
index 104acd5..6d94f33 100644
--- a/src/operator/linalg_impl.h
+++ b/src/operator/linalg_impl.h
@@ -212,12 +212,15 @@ inline void linalg_gemm<gpu, float>(const Tensor<gpu, 2, float>& A,
#else
cublasDataType_t full_datatype = CUBLAS_DATA_FULL;
#endif
+ auto handle = Stream<gpu>::GetBlasHandle(s);
+ cublasMath_t saved_math_mode = SetCublasMathMode(handle, VERSION_ADJUSTED_TF32_MATH);
CUBLAS_CALL(cublasSgemmEx(
- Stream<gpu>::GetBlasHandle(s), (tB ? CUBLAS_OP_T : CUBLAS_OP_N),
+ handle, (tB ? CUBLAS_OP_T : CUBLAS_OP_N),
(tA ? CUBLAS_OP_T : CUBLAS_OP_N), C.size(1), C.size(0),
(tB ? B.size(1) : B.size(0)), &alpha, B.dptr_, full_datatype, B.stride_,
A.dptr_, full_datatype, A.stride_, &beta, C.dptr_, full_datatype,
- C.stride_))
+ C.stride_));
+ CUBLAS_CALL(cublasSetMathMode(handle, saved_math_mode));
}
#else
@@ -235,13 +238,16 @@ void linalg_gemm_axis<gpu, DType>(const Tensor<gpu, 3, DType>& A, const Tensor<g
using mshadow::gpu; \
CHECK_NOTNULL(s); \
linalg_check_batch_size(A.size(1), B.size(1), C.size(1)); \
- CUBLAS_CALL(cublas##fname(Stream<gpu>::GetBlasHandle(s), \
+ auto handle = Stream<gpu>::GetBlasHandle(s); \
+ cublasMath_t saved_math_mode = SetCublasMathMode(handle, VERSION_ADJUSTED_TF32_MATH); \
+ CUBLAS_CALL(cublas##fname(handle, \
(tB ? CUBLAS_OP_T : CUBLAS_OP_N), \
(tA ? CUBLAS_OP_T : CUBLAS_OP_N), \
C.size(2), C.size(0), (tB ? B.size(2) : B.size(0)), &alpha, \
B.dptr_, B.size(1)*B.stride_, B.stride_, \
A.dptr_, A.size(1)*A.stride_, A.stride_, &beta, \
C.dptr_, C.size(1)*C.stride_, C.stride_, A.size(1))) \
+ CUBLAS_CALL(cublasSetMathMode(handle, saved_math_mode)); \
}
LINALG_GPU_GEMM_AXIS(SgemmStridedBatched, float)
LINALG_GPU_GEMM_AXIS(DgemmStridedBatched, double)
@@ -349,13 +355,22 @@ void linalg_gemm<gpu, mshadow::half::half_t>(const Tensor<gpu, 2, mshadow::half:
linalg_check_batch_size(A.size(0), B.size(0), C.size(0)); \
check_gemm(A[0], B[0], C[0], alpha, beta, tA, tB); \
using namespace mshadow::cuda; \
- CUBLAS_CALL(cublas##fname(Stream<gpu>::GetBlasHandle(s), \
+ auto handle = Stream<gpu>::GetBlasHandle(s); \
+ cublasMath_t saved_math_mode = SetCublasMathMode(handle, VERSION_ADJUSTED_TF32_MATH); \
+ CUBLAS_CALL(cublas##fname(handle, \
(tB ? CUBLAS_OP_T : CUBLAS_OP_N), \
(tA ? CUBLAS_OP_T : CUBLAS_OP_N), \
C.size(2), C.size(1), (tB ? B.size(2) : B.size(1)), \
- &alpha, B.dptr_, B.stride_, B.size(1) * B.stride_, \
- A.dptr_, A.stride_, A.size(1) * A.stride_, \
- &beta, C.dptr_, C.stride_, C.size(1) * C.stride_, A.size(0))) \
+ &alpha, \
+ B.dptr_, B.stride_, \
+ static_cast<int64_t>(B.size(1) * B.stride_), \
+ A.dptr_, A.stride_, \
+ static_cast<int64_t>(A.size(1) * A.stride_), \
+ &beta, \
+ C.dptr_, C.stride_, \
+ static_cast<int64_t>(C.size(1) * C.stride_), \
+ A.size(0))) \
+ CUBLAS_CALL(cublasSetMathMode(handle, saved_math_mode)); \
}
LINALG_GPU_BATCH_GEMM(DgemmStridedBatched, double)
@@ -380,7 +395,7 @@ void linalg_gemm<gpu, mshadow::half::half_t>(const Tensor<gpu, 2, mshadow::half:
using namespace mshadow::cuda;
auto cublas_math_mode =
- use_tensor_ops ? CUBLAS_TENSOR_OP_MATH : CUBLAS_DEFAULT_MATH;
+ use_tensor_ops ? CUBLAS_TENSOR_OP_MATH : VERSION_ADJUSTED_TF32_MATH;
auto previous_math_mode = SetCublasMathMode(blas_handle, cublas_math_mode);
// cublasGemmStridedBatchedEx is only supported for GPU with architecture
@@ -421,6 +436,8 @@ void linalg_gemm<gpu, mshadow::half::half_t>(const Tensor<gpu, 2, mshadow::half:
CHECK_NOTNULL(s); \
linalg_check_batch_size(A.size(0), B.size(0), C.size(0)); \
linalg_check_batch_size(A.size(2), B.size(2), C.size(2)); \
+ auto handle = Stream<gpu>::GetBlasHandle(s); \
+ cublasMath_t saved_math_mode = SetCublasMathMode(handle, VERSION_ADJUSTED_TF32_MATH); \
for (index_t i = 0; i < A.size(2); ++i) { \
CUBLAS_CALL(cublas##fname(Stream<gpu>::GetBlasHandle(s), \
(tB ? CUBLAS_OP_T : CUBLAS_OP_N), \
@@ -430,6 +447,7 @@ void linalg_gemm<gpu, mshadow::half::half_t>(const Tensor<gpu, 2, mshadow::half:
A.dptr_+i*A.stride_, A.size(2) * A.stride_, A.size(1)*A.size(2)*A.stride_, &beta, \
C.dptr_+i*C.stride_, C.size(2) * C.stride_, C.size(1)*C.size(2)*C.stride_, A.size(0))) \
}\
+ SetCublasMathMode(handle, saved_math_mode); \
}
LINALG_GPU_BATCH_GEMM_AXIS(SgemmStridedBatched, float)
diff --git a/src/operator/nn/log_softmax.cc b/src/operator/nn/log_softmax.cc
index f3ef4ab..28ae8cf 100644
--- a/src/operator/nn/log_softmax.cc
+++ b/src/operator/nn/log_softmax.cc
@@ -58,6 +58,7 @@ static void LogSoftmaxGradComputeExCPU(const nnvm::NodeAttrs& attrs,
const std::vector<NDArray>& inputs,
const std::vector<OpReqType>& req,
const std::vector<NDArray>& outputs) {
+ if (inputs[0].shape().Size() == 0U) return;
const SoftmaxParam& param = nnvm::get<SoftmaxParam>(attrs.parsed);
if (SupportMKLDNNLogSoftmax(param, inputs[1], outputs[0])) {
MKLDNN_OPCHECK_INIT(false, outputs.size(), inputs, outputs);
diff --git a/src/operator/nn/softmax.cc b/src/operator/nn/softmax.cc
index b95e159..9b28b71 100644
--- a/src/operator/nn/softmax.cc
+++ b/src/operator/nn/softmax.cc
@@ -59,6 +59,7 @@ static void SoftmaxGradComputeExCPU(const nnvm::NodeAttrs& attrs,
const std::vector<NDArray>& inputs,
const std::vector<OpReqType>& req,
const std::vector<NDArray>& outputs) {
+ if (inputs[0].shape().Size() == 0U) return;
const SoftmaxParam& param = nnvm::get<SoftmaxParam>(attrs.parsed);
if (SupportMKLDNNSoftmax(param, inputs[1], outputs[0])) {
MKLDNN_OPCHECK_INIT(false, outputs.size(), inputs, outputs);
diff --git a/src/operator/numpy/np_true_divide-inl.h b/src/operator/numpy/np_true_divide-inl.h
index 6e97511..ea0057b 100644
--- a/src/operator/numpy/np_true_divide-inl.h
+++ b/src/operator/numpy/np_true_divide-inl.h
@@ -59,15 +59,17 @@ void TrueDivideScalarCompute(const nnvm::NodeAttrs &attrs,
});
});
} else {
- CHECK_EQ(outputs[0].type_flag_, mxnet::common::GetDefaultDtype())
+ CHECK(out.type_flag_ == mshadow::kFloat32 || out.type_flag_ == mshadow::kFloat64)
<< "true_divide only supports float32 and float64"
" output when input's dtype is "
<< type_string(inputs[0].type_flag_);
- MXNET_INT_TYPE_SWITCH(inputs[0].type_flag_, DType, {
- MXNET_ASSIGN_REQ_SWITCH(req[0], Req, {
- Kernel<op_with_req<OP, Req>, xpu>::Launch(
- s, data.Size(), out.dptr<float>(), data.dptr<DType>(),
- static_cast<float>(alpha));
+ MSHADOW_REAL_TYPE_SWITCH(out.type_flag_, ODType, {
+ MXNET_INT_TYPE_SWITCH(inputs[0].type_flag_, DType, {
+ MXNET_ASSIGN_REQ_SWITCH(req[0], Req, {
+ Kernel<op_with_req<OP, Req>, xpu>::Launch(
+ s, data.Size(), out.dptr<ODType>(), data.dptr<DType>(),
+ static_cast<ODType>(alpha));
+ });
});
});
}
diff --git a/tests/python/gpu/test_gluon_gpu.py b/tests/python/gpu/test_gluon_gpu.py
index ba7deae..e259b74 100644
--- a/tests/python/gpu/test_gluon_gpu.py
+++ b/tests/python/gpu/test_gluon_gpu.py
@@ -50,10 +50,9 @@ def check_rnn_layer(layer):
states = layer.begin_state(16)
co, cs = layer(x, states)
- # atol of 1e-6 required, as exposed by seed 2124685726
- assert_almost_equal(go, co, rtol=1e-2, atol=1e-6)
+ assert_almost_equal(go, co)
for g, c in zip(gs, cs):
- assert_almost_equal(g, c, rtol=1e-2, atol=1e-6)
+ assert_almost_equal(g, c)
@with_seed()
@@ -70,9 +69,9 @@ def check_rnn_layer_w_rand_inputs(layer):
states = layer.begin_state(16)
co, cs = layer(x, states)
- assert_almost_equal(go, co, rtol=1e-2, atol=1e-6)
+ assert_almost_equal(go, co)
for g, c in zip(gs, cs):
- assert_almost_equal(g, c, rtol=1e-2, atol=1e-6)
+ assert_almost_equal(g, c)
@with_seed()
@@ -485,6 +484,13 @@ def test_large_models():
# This in the past has given cudnnFind() trouble when it needed to allocate similar I/O's
# from the area carved out by the MXNET_GPU_MEM_POOL_RESERVE setting (by default 5%).
(free_mem_bytes, total_mem_bytes) = mx.context.gpu_memory_info(ctx.device_id)
+ # This test needs to be 'qualified' for use with each new larger memory size
+ largest_supported_total_mem_GB = 32
+ if (total_mem_bytes > largest_supported_total_mem_GB * 1024 * 1024 * 1024):
+ sys.stderr.write(
+ ' bypassing test due to too-large global memory of size {} ... '.format(total_mem_bytes))
+ return
+
start_size = tensor_size(0.20 * total_mem_bytes)
num_trials = 10
sys.stderr.write(
diff --git a/tests/python/gpu/test_gluon_model_zoo_gpu.py b/tests/python/gpu/test_gluon_model_zoo_gpu.py
index 4272971..1d0d3f4 100644
--- a/tests/python/gpu/test_gluon_model_zoo_gpu.py
+++ b/tests/python/gpu/test_gluon_model_zoo_gpu.py
@@ -89,7 +89,7 @@ def test_inference(model_name):
max_val = np.max(np.abs(cpu_out.asnumpy()))
gpu_max_val = np.max(np.abs(gpu_out.asnumpy()))
eprint(model_name + ": CPU " + str(max_val) + ", GPU " + str(gpu_max_val))
- assert_almost_equal(cpu_out / max_val, gpu_out / gpu_max_val, rtol=1e-3, atol=1e-3)
+ assert_almost_equal(cpu_out / max_val, gpu_out / gpu_max_val)
def get_nn_model(name):
if "densenet" in name:
diff --git a/tests/python/gpu/test_operator_gpu.py b/tests/python/gpu/test_operator_gpu.py
index 9b96960..d088b44 100644
--- a/tests/python/gpu/test_operator_gpu.py
+++ b/tests/python/gpu/test_operator_gpu.py
@@ -23,6 +23,7 @@ import multiprocessing as mp
import mxnet as mx
import numpy as np
import pytest
+import itertools
from mxnet.test_utils import check_consistency, set_default_context, assert_almost_equal, assert_allclose
from mxnet.test_utils import check_symbolic_forward, check_symbolic_backward, discard_stderr
from mxnet.test_utils import default_context, rand_shape_2d, rand_ndarray, same
@@ -404,30 +405,20 @@ def test_batchnorm_with_type():
]
# V2, 2D
- sym = mx.sym.BatchNorm(name='norm', fix_gamma=False, cudnn_off=True)
- check_consistency(sym, ctx_list_v2_2D)
- sym = mx.sym.BatchNorm(name='norm', fix_gamma=False, cudnn_off=True)
- check_consistency(sym, ctx_list_v2_2D)
- sym = mx.sym.BatchNorm(name='norm', fix_gamma=True, cudnn_off=True)
- check_consistency(sym, ctx_list_v2_2D)
- sym = mx.sym.BatchNorm(name='norm', fix_gamma=True, cudnn_off=True)
- check_consistency(sym, ctx_list_v2_2D)
+ bools = [False, True]
+ for fix_gamma, cudnn_off in itertools.product(bools, bools):
+ sym = mx.sym.BatchNorm(name='norm', fix_gamma=fix_gamma, cudnn_off=cudnn_off)
+ check_consistency(sym, ctx_list_v2_2D)
# V2, 1D
- sym = mx.sym.BatchNorm(name='norm', fix_gamma=False, cudnn_off=True)
- check_consistency(sym, ctx_list_v2_1D)
- sym = mx.sym.BatchNorm(name='norm', fix_gamma=False, cudnn_off=True)
- check_consistency(sym, ctx_list_v2_1D)
- sym = mx.sym.BatchNorm(name='norm', fix_gamma=True, cudnn_off=True)
- check_consistency(sym, ctx_list_v2_1D)
- sym = mx.sym.BatchNorm(name='norm', fix_gamma=True, cudnn_off=True)
- check_consistency(sym, ctx_list_v2_1D)
- #
- # # V2, 3D
- sym = mx.sym.BatchNorm(name='norm', fix_gamma=False, cudnn_off=True)
- check_consistency(sym, ctx_list_v2_3D)
- sym = mx.sym.BatchNorm(name='norm', fix_gamma=True, cudnn_off=True)
- check_consistency(sym, ctx_list_v2_3D)
+ for fix_gamma, cudnn_off in itertools.product(bools, bools):
+ sym = mx.sym.BatchNorm(name='norm', fix_gamma=fix_gamma, cudnn_off=cudnn_off)
+ check_consistency(sym, ctx_list_v2_1D)
+
+ # V2, 3D
+ for fix_gamma, cudnn_off in itertools.product(bools, [True,]):
+ sym = mx.sym.BatchNorm(name='norm', fix_gamma=fix_gamma, cudnn_off=cudnn_off)
+ check_consistency(sym, ctx_list_v2_3D)
@with_seed()
@@ -529,9 +520,9 @@ def test_convolution_with_type():
np.dtype(np.float64): 1e-5,
np.dtype(np.uint8): 0,
np.dtype(np.int32): 0}
- check_consistency(sym, ctx_list, tol=tol)
+ check_consistency(sym, ctx_list, rtol=tol, atol=tol)
# test ability to turn off training on bias
- check_consistency(sym, ctx_list, grad_req={'conv_data': 'write', 'conv_weight': 'write', 'conv_bias': 'null'}, tol=tol)
+ check_consistency(sym, ctx_list, grad_req={'conv_data': 'write', 'conv_weight': 'write', 'conv_bias': 'null'}, rtol=tol, atol=tol)
# Apply N symbols against each of M contexts, checking that all NxM combinations match.
@@ -616,7 +607,6 @@ def test_conv_deconv_guards():
# Test cases for convolution and deconvolution via strided fft. Ensure that the framework
# guards against problematic CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING in cuDNN [7.3.1,7.5)
# see https://docs.nvidia.com/deeplearning/sdk/cudnn-release-notes/rel_750.html#rel_750
- tol = 1e-1
for (op, opname) in [(mx.sym.Convolution, 'conv'), (mx.sym.Deconvolution, 'deconv')]:
dataname = opname + '_data'
ctx = {'ctx': mx.gpu(0), dataname: (32, 32, 64, 64), 'type_dict': {dataname: np.float32}}
@@ -631,7 +621,7 @@ def test_conv_deconv_guards():
try:
sym = op(**test_case_args)
sym_no_cudnn = op(cudnn_off=True, **test_case_args)
- check_consistency([sym, sym_no_cudnn], [ctx, ctx], tol=tol)
+ check_consistency([sym, sym_no_cudnn], [ctx, ctx], scale=0.1)
except:
print('Test failure of mx.sym.{} with args: {}'.format(op.__name__, test_case_args))
raise
@@ -655,7 +645,7 @@ def _conv_with_num_streams(seed):
cudnn_off=True, name='conv')
try:
# tol can be pretty high- we're looking for a large diff due to garbaged workspace
- check_consistency([sym, sym_no_cudnn], [ctx, ctx], tol=1e-2)
+ check_consistency([sym, sym_no_cudnn], [ctx, ctx], rtol=1e-2, atol=1e-2)
except:
print('Failing conv size = {}'.format(size))
raise
@@ -678,20 +668,19 @@ def test_convolution_multiple_streams():
@pytest.mark.serial
def test_convolution_large_c():
problematic_c = 64 * 1024
- # The convolution accumulates many values, so set large tolerances.
- tol = {np.dtype(np.float32): 1,
- np.dtype(np.float64): 1}
+ # The convolution accumulates many values, so scale the input magnitude.
+ scale = 0.1
def test_1D_with_width(width, grad_req):
ctx_list = [{'ctx': mx.gpu(0), 'conv_data': (1, problematic_c, width), 'type_dict': {'conv_data': np.float32}},
{'ctx': mx.gpu(0), 'conv_data': (1, problematic_c, width), 'type_dict': {'conv_data': np.float64}}]
sym = mx.sym.Convolution(layout='NCW', num_filter=8, kernel=(2,), name='conv')
- check_consistency([sym, sym], ctx_list, tol=tol, grad_req=grad_req)
+ check_consistency([sym, sym], ctx_list, grad_req=grad_req, scale=scale)
def test_2D_with_width(width, grad_req):
ctx_list = [{'ctx': mx.gpu(0), 'conv_data': (1, problematic_c, 2, width), 'type_dict': {'conv_data': np.float32}},
{'ctx': mx.gpu(0), 'conv_data': (1, problematic_c, 2, width), 'type_dict': {'conv_data': np.float64}}]
sym = mx.sym.Convolution(layout='NCHW', num_filter=4, kernel=(2,2), name='conv')
- check_consistency([sym, sym], ctx_list, tol=tol, grad_req=grad_req)
+ check_consistency([sym, sym], ctx_list, grad_req=grad_req, scale=scale)
# Run with different data tensor shapes to run cudnnFind() multiple times.
# First, populate algo and op caches with models that always use cudnnFind() (req == 'write').
@@ -709,20 +698,19 @@ def test_convolution_large_c():
@pytest.mark.serial
def test_deconvolution_large_c():
problematic_c = 64 * 1024
- # The deconvolution accumulates many values, so set large tolerances.
- tol = {np.dtype(np.float32): 1,
- np.dtype(np.float64): 1}
+ # The deconvolution accumulates many values, so scale the input magnitude.
+ scale = 0.1
def test_1D_with_width(width, grad_req):
ctx_list = [{'ctx': mx.gpu(0), 'deconv_data': (1, 8, width), 'type_dict': {'deconv_data': np.float32}},
{'ctx': mx.gpu(0), 'deconv_data': (1, 8, width), 'type_dict': {'deconv_data': np.float64}}]
sym = mx.sym.Deconvolution(layout='NCW', num_filter=problematic_c, kernel=(2,), name='deconv')
- check_consistency([sym, sym], ctx_list, tol=tol, grad_req=grad_req)
+ check_consistency([sym, sym], ctx_list, grad_req=grad_req, scale=scale)
def test_2D_with_width(width, grad_req):
ctx_list = [{'ctx': mx.gpu(0), 'deconv_data': (1, 8, 2, width), 'type_dict': {'deconv_data': np.float32}},
{'ctx': mx.gpu(0), 'deconv_data': (1, 8, 2, width), 'type_dict': {'deconv_data': np.float64}}]
sym = mx.sym.Deconvolution(layout='NCHW', num_filter=problematic_c, kernel=(2,2), name='deconv')
- check_consistency([sym, sym], ctx_list, tol=tol, grad_req=grad_req)
+ check_consistency([sym, sym], ctx_list, grad_req=grad_req, scale=scale)
# Run with different data tensor shapes to run cudnnFind() multiple times.
# First, populate algo and op caches with models that always use cudnnFind() (req == 'write').
@@ -831,8 +819,8 @@ def test_deconvolution_with_type():
np.dtype(np.float64): 1e-5,
np.dtype(np.uint8): 0,
np.dtype(np.int32): 0}
- check_consistency(sym, ctx_list, tol=tol)
- check_consistency(sym, ctx_list, tol=tol, grad_req="add")
+ check_consistency(sym, ctx_list, rtol=tol, atol=tol)
+ check_consistency(sym, ctx_list, rtol=tol, atol=tol, grad_req="add")
# 2D deconvolution
sym = mx.sym.Deconvolution(num_filter=2, kernel=(3,3), name='deconv')
@@ -847,8 +835,8 @@ def test_deconvolution_with_type():
np.dtype(np.float64): 1e-5,
np.dtype(np.uint8): 0,
np.dtype(np.int32): 0}
- check_consistency(sym, ctx_list, tol=tol)
- check_consistency(sym, ctx_list, tol=tol, grad_req="add")
+ check_consistency(sym, ctx_list, rtol=tol, atol=tol)
+ check_consistency(sym, ctx_list, rtol=tol, atol=tol, grad_req="add")
@with_seed()
@@ -931,10 +919,11 @@ def test_bilinear_sampler_with_type():
def test_grid_generator_with_type():
data = mx.sym.Variable('data')
sym = mx.sym.GridGenerator(data=data, transform_type='affine', target_shape=(20, 20))
+ scale = 1
ctx_list = [{'ctx': mx.gpu(0), 'data': (3, 6), 'type_dict': {'data': np.float32}},
{'ctx': mx.cpu(0), 'data': (3, 6), 'type_dict': {'data': np.float32}}]
- check_consistency(sym, ctx_list)
- check_consistency(sym, ctx_list, grad_req="add")
+ check_consistency(sym, ctx_list, scale=scale)
+ check_consistency(sym, ctx_list, scale=scale, grad_req="add")
sym = mx.sym.GridGenerator(data=data, transform_type='warp', target_shape=(20, 20))
ctx_list = [{'ctx': mx.gpu(0), 'data': (3, 2, 20, 20), 'type_dict': {'data': np.float32}},
{'ctx': mx.cpu(0), 'data': (3, 2, 20, 20), 'type_dict': {'data': np.float32}}]
@@ -1080,7 +1069,7 @@ def test_pooling_versions():
pool_op))
sym_list.append(sym)
- check_consistency(sym_list, ctx_list, equal_nan=(not count_include_pad), tol=tol)
+ check_consistency(sym_list, ctx_list, equal_nan=(not count_include_pad), rtol=tol, atol=tol)
def test_pooling_dim(dim, pool_type, dtype, pool_op_list, p_value=2, count_include_pad=True,
tol=None):
@@ -1239,7 +1228,7 @@ def test_flatten_slice_after_conv():
ctx_list = [{'ctx': mx.gpu(0), 'conv_data': (2, 16, 16, 16), 'type_dict': {'conv_data': np.float32}},
{'ctx': mx.cpu(0), 'conv_data': (2, 16, 16, 16), 'type_dict': {'conv_data': np.float32}}]
- check_consistency(slice_sym, ctx_list)
+ check_consistency(slice_sym, ctx_list, scale=0.5)
@with_seed()
@@ -1545,7 +1534,7 @@ def test_embedding_with_type():
'type_dict': {'embedding_data': data_type, 'embedding_weight': weight_type}})
arg_params = {'embedding_data': np.random.randint(low=-low_pad, high=V+high_pad, size=(N,))}
check_consistency(sym, ctx_list, grad_req={'embedding_data': 'null','embedding_weight': 'write'},
- arg_params=arg_params)
+ arg_params=arg_params, scale=0.1)
data_types = [np.float16, np.float32, np.float64, np.int32]
weight_types = [np.float16, np.float32, np.float64]
@@ -1678,7 +1667,7 @@ def test_deformable_psroipooling_with_type():
'deformable_psroipool_trans': np.float16}},
]
- check_consistency(sym, ctx_list, scale=0.1, tol=tol,
+ check_consistency(sym, ctx_list, scale=0.1, rtol=tol, atol=tol,
grad_req={'deformable_psroipool_data': 'write',
'deformable_psroipool_rois': 'null',
'deformable_psroipool_trans': 'write'}, arg_params=arg_params)
@@ -1710,9 +1699,9 @@ def test_deformable_convolution_with_type():
'type_dict': {'deformable_conv_data': np.float32, 'deformable_conv_offset': np.float32}},
]
- check_consistency(sym, ctx_list, scale=0.1, tol=tol)
+ check_consistency(sym, ctx_list, scale=0.1, rtol=tol, atol=tol)
# test ability to turn off training on bias
- check_consistency(sym, ctx_list, scale=0.1, tol=tol,
+ check_consistency(sym, ctx_list, scale=0.1, rtol=tol, atol=tol,
grad_req={'deformable_conv_data': 'write',
'deformable_conv_offset': 'write',
'deformable_conv_weight': 'write',
@@ -1745,7 +1734,7 @@ def test_deformable_convolution_options():
'type_dict': {'deformable_conv_data': np.float32, 'deformable_conv_offset': np.float32}},
]
sym = mx.sym.contrib.DeformableConvolution(num_filter=3, kernel=(3,3), pad=(1,1), name='deformable_conv')
- check_consistency(sym, ctx_list, scale=0.1, tol=tol)
+ check_consistency(sym, ctx_list, scale=0.1, rtol=tol, atol=tol)
# Stride > 1
ctx_list = [{'ctx': mx.gpu(0),
@@ -1766,7 +1755,7 @@ def test_deformable_convolution_options():
'type_dict': {'deformable_conv_data': np.float32, 'deformable_conv_offset': np.float32}},
]
sym = mx.sym.contrib.DeformableConvolution(num_filter=3, kernel=(3,3), stride=(2,2), name='deformable_conv')
- check_consistency(sym, ctx_list, scale=0.1, tol=tol)
+ check_consistency(sym, ctx_list, scale=0.1, rtol=tol, atol=tol)
# Dilate > 1
ctx_list = [{'ctx': mx.gpu(0),
@@ -1787,7 +1776,7 @@ def test_deformable_convolution_options():
'type_dict': {'deformable_conv_data': np.float32, 'deformable_conv_offset': np.float32}},
]
sym = mx.sym.contrib.DeformableConvolution(num_filter=3, kernel=(3,3), dilate=(2,2), name='deformable_conv')
- check_consistency(sym, ctx_list, scale=0.1, tol=tol)
+ check_consistency(sym, ctx_list, scale=0.1, rtol=tol, atol=tol)
# Deformable group > 1
ctx_list = [{'ctx': mx.gpu(0),
@@ -1808,7 +1797,7 @@ def test_deformable_convolution_options():
'type_dict': {'deformable_conv_data': np.float32, 'deformable_conv_offset': np.float32}},
]
sym = mx.sym.contrib.DeformableConvolution(num_filter=4, kernel=(3,3), num_deformable_group=2, name='deformable_conv')
- check_consistency(sym, ctx_list, scale=0.1, tol=tol)
+ check_consistency(sym, ctx_list, scale=0.1, rtol=tol, atol=tol)
def check_rnn_layer(layer):
diff --git a/tests/python/gpu/test_profiler_gpu.py b/tests/python/gpu/test_profiler_gpu.py
index 11a0b7d..89eb425 100644
--- a/tests/python/gpu/test_profiler_gpu.py
+++ b/tests/python/gpu/test_profiler_gpu.py
@@ -27,6 +27,8 @@ sys.path.insert(0, os.path.join(curr_path, '../unittest'))
# They will be detected by test framework, as long as the current file has a different filename
from test_profiler import *
+# Test seen to crash pytest worker during development of https://github.com/apache/incubator-mxnet/pull/18694
+del test_aggregate_duplication
def test_gpu_memory_profiler_symbolic():
iter_num = 5
diff --git a/tests/python/unittest/common.py b/tests/python/unittest/common.py
index 0219616..331f32d 100644
--- a/tests/python/unittest/common.py
+++ b/tests/python/unittest/common.py
@@ -222,11 +222,13 @@ def with_seed(seed=None):
try:
orig_test(*args, **kwargs)
except:
- # With exceptions, repeat test_msg at INFO level to be sure it's seen.
- if log_level < logging.INFO:
- logger.info(test_msg)
+ # With exceptions, repeat test_msg at WARNING level to be sure it's seen.
+ if log_level < logging.WARNING:
+ logger.warning(test_msg)
raise
finally:
+ # Provide test-isolation for any test having this decorator
+ mx.nd.waitall()
np.random.set_state(post_test_state)
return test_new
return test_helper
@@ -285,7 +287,7 @@ def setup_module():
seed = np.random.randint(0, np.iinfo(np.int32).max)
else:
seed = int(module_seed_str)
- logger.warn('*** module-level seed is set: all tests running deterministically ***')
+ logger.warning('*** module-level seed is set: all tests running deterministically ***')
logger.info('Setting module np/mx/python random seeds, use MXNET_MODULE_SEED=%s to reproduce.', seed)
np.random.seed(seed)
mx.random.seed(seed)
@@ -293,7 +295,7 @@ def setup_module():
# The MXNET_TEST_SEED environment variable will override MXNET_MODULE_SEED for tests with
# the 'with_seed()' decoration. Inform the user of this once here at the module level.
if os.getenv('MXNET_TEST_SEED') is not None:
- logger.warn('*** test-level seed set: all "@with_seed()" tests run deterministically ***')
+ logger.warning('*** test-level seed set: all "@with_seed()" tests run deterministically ***')
def teardown_module():
@@ -359,13 +361,13 @@ def retry(n):
@functools.wraps(orig_test)
def test_new(*args, **kwargs):
"""Wrapper for tests function."""
- for _ in range(n):
+ for i in range(n):
try:
orig_test(*args, **kwargs)
+ return
except AssertionError as e:
- err = e
- continue
- return
- raise err
+ if i == n-1:
+ raise e
+ mx.nd.waitall()
return test_new
return test_helper
diff --git a/tests/python/unittest/test_autograd.py b/tests/python/unittest/test_autograd.py
index cc1e87a..ad0601c 100644
--- a/tests/python/unittest/test_autograd.py
+++ b/tests/python/unittest/test_autograd.py
@@ -440,6 +440,7 @@ def test_grad_with_stype():
check_grad_with_stype(stype, grad_stype, grad_stype)
@with_seed()
+@pytest.mark.garbage_expected
def test_sparse_dot_grad():
def check_sparse_dot_grad(rhs):
lhs = rand_ndarray((2, 8), 'csr')
diff --git a/tests/python/unittest/test_contrib_gluon_data_vision.py b/tests/python/unittest/test_contrib_gluon_data_vision.py
index d2e38d6..166b07f 100644
--- a/tests/python/unittest/test_contrib_gluon_data_vision.py
+++ b/tests/python/unittest/test_contrib_gluon_data_vision.py
@@ -19,7 +19,7 @@ import mxnet as mx
import numpy as np
import scipy.ndimage
from mxnet.test_utils import *
-from common import assertRaises, with_seed
+from common import assertRaises, with_seed, setup_module, teardown_module
import shutil
import tempfile
import unittest
@@ -63,6 +63,7 @@ class TestImage(unittest.TestCase):
print("cleanup {}".format(self.IMAGES_DIR))
shutil.rmtree(self.IMAGES_DIR)
+ @with_seed()
def test_imageiter(self):
im_list = [[np.random.randint(0, 5), x] for x in self.IMAGES]
os.makedirs('./data', exist_ok=True)
@@ -95,6 +96,7 @@ class TestImage(unittest.TestCase):
for batch in it:
pass
+ @with_seed()
def test_image_bbox_iter(self):
im_list = [_generate_objects() + [x] for x in self.IMAGES]
det_iter = mx.gluon.contrib.data.vision.ImageBboxDataLoader(2, (3, 300, 300), imglist=im_list, path_root='')
@@ -131,6 +133,7 @@ class TestImage(unittest.TestCase):
]
+ @with_seed()
def test_bbox_augmenters(self):
# only test if all augmenters will work
# TODO(Joshua Zhang): verify the augmenter outputs
diff --git a/tests/python/unittest/test_gluon.py b/tests/python/unittest/test_gluon.py
index 52bab2b..588f5c1 100644
--- a/tests/python/unittest/test_gluon.py
+++ b/tests/python/unittest/test_gluon.py
@@ -22,7 +22,7 @@ import mxnet as mx
from mxnet import gluon
from mxnet.gluon import nn
from mxnet.base import py_str, MXNetError
-from mxnet.test_utils import assert_almost_equal
+from mxnet.test_utils import assert_almost_equal, default_context
from mxnet.util import is_np_array
from mxnet.ndarray.ndarray import _STORAGE_TYPE_STR_TO_ID
from mxnet.test_utils import use_np
@@ -810,7 +810,7 @@ def test_sync_batchnorm():
input2grad.asnumpy(), atol=atol, rtol=rtol)
cfgs = [(1, False)]
- num_gpus = mx.context.num_gpus()
+ num_gpus = 0 if default_context().device_type != 'gpu' else mx.context.num_gpus()
batch_size = 24
for i in range(1, num_gpus + 1):
if batch_size % i == 0:
diff --git a/tests/python/unittest/test_gluon_probability_v1.py b/tests/python/unittest/test_gluon_probability_v1.py
index 92721f6..b5b1644 100644
--- a/tests/python/unittest/test_gluon_probability_v1.py
+++ b/tests/python/unittest/test_gluon_probability_v1.py
@@ -417,7 +417,7 @@ def test_gluon_half_cauchy_v1():
# Test icdf
for shape, hybridize in itertools.product(shapes, [True, False]):
scale = np.random.uniform(0.5, 1.5, shape)
- samples = np.random.uniform(size=shape)
+ samples = np.random.uniform(size=shape, high=1.0-1e-4)
net = TestHalfCauchy("icdf")
if hybridize:
net.hybridize()
@@ -1727,15 +1727,15 @@ def test_gluon_mvn_v1():
net.hybridize()
mx_out = net(loc, cov_param, samples)
assert mx_out.shape == samples.shape[:-1]
- # Select the first element in the batch, because scipy does not support batching.
- loc_t = loc.reshape(-1, event_shape)[0].asnumpy()
- sigma_t = sigma.reshape(-1, event_shape,
- event_shape)[0].asnumpy()
if mx_out.shape == ():
mx_out_t = mx_out.asnumpy()
else:
mx_out_t = mx_out.flatten()[0].asnumpy()
samples_t = samples.reshape(-1, event_shape).asnumpy()[0]
+ # Select the first element in the batch, because scipy does not support batching.
+ loc_t = loc.reshape(-1, event_shape)[0].asnumpy()
+ sigma_t = sigma.reshape(-1, event_shape,
+ event_shape)[0].asnumpy()
scipy_mvn = ss.multivariate_normal(loc_t, sigma_t)
ss_out = scipy_mvn.logpdf(samples_t)
assert_almost_equal(mx_out_t, ss_out, atol=1e-4,
@@ -1758,14 +1758,14 @@ def test_gluon_mvn_v1():
net.hybridize()
mx_out = net(loc, cov_param)
assert mx_out.shape == sigma.shape[:-2]
- # Select the first element in the batch, because scipy does not support batching.
- loc_t = loc.reshape(-1, event_shape)[0].asnumpy()
- sigma_t = sigma.reshape(-1, event_shape,
- event_shape)[0].asnumpy()
if mx_out.shape == ():
mx_out_t = mx_out.asnumpy()
else:
mx_out_t = mx_out.flatten()[0].asnumpy()
+ # Select the first element in the batch, because scipy does not support batching.
+ loc_t = loc.reshape(-1, event_shape)[0].asnumpy()
+ sigma_t = sigma.reshape(-1, event_shape,
+ event_shape)[0].asnumpy()
scipy_mvn = ss.multivariate_normal(loc_t, sigma_t)
ss_out = scipy_mvn.entropy()
assert_almost_equal(mx_out_t, ss_out, atol=1e-4,
@@ -2084,7 +2084,7 @@ def test_gluon_kl_v1():
# exponential, geometric
for dist in [mgp.Exponential, mgp.Geometric]:
for shape in shapes:
- def s(): return np.random.uniform(size=shape)
+ def s(): return np.random.uniform(size=shape, low=1e-3)
_test_zero_kl(_dist_factory(dist, s), shape)
if monte_carlo_test:
_test_monte_carlo(_dist_factory(dist, s),
diff --git a/tests/python/unittest/test_gluon_probability_v2.py b/tests/python/unittest/test_gluon_probability_v2.py
index 9a36b4f..d75aa69 100644
--- a/tests/python/unittest/test_gluon_probability_v2.py
+++ b/tests/python/unittest/test_gluon_probability_v2.py
@@ -417,7 +417,7 @@ def test_gluon_half_cauchy():
# Test icdf
for shape, hybridize in itertools.product(shapes, [True, False]):
scale = np.random.uniform(0.5, 1.5, shape)
- samples = np.random.uniform(size=shape)
+ samples = np.random.uniform(size=shape, high=1.0-1e-4)
net = TestHalfCauchy("icdf")
if hybridize:
net.hybridize()
@@ -1727,14 +1727,14 @@ def test_gluon_mvn():
net.hybridize()
mx_out = net(loc, cov_param, samples)
assert mx_out.shape == samples.shape[:-1]
- # Select the first element in the batch, because scipy does not support batching.
- loc_t = loc.reshape(-1, event_shape)[0].asnumpy()
- sigma_t = sigma.reshape(-1, event_shape,
- event_shape)[0].asnumpy()
if mx_out.shape == ():
mx_out_t = mx_out.asnumpy()
else:
mx_out_t = mx_out.asnumpy().flatten()[0]
+ # Select the first element in the batch, because scipy does not support batching.
+ loc_t = loc.reshape(-1, event_shape)[0].asnumpy()
+ sigma_t = sigma.reshape(-1, event_shape,
+ event_shape)[0].asnumpy()
samples_t = samples.reshape(-1, event_shape).asnumpy()[0]
scipy_mvn = ss.multivariate_normal(loc_t, sigma_t)
ss_out = scipy_mvn.logpdf(samples_t)
@@ -1758,14 +1758,14 @@ def test_gluon_mvn():
net.hybridize()
mx_out = net(loc, cov_param)
assert mx_out.shape == sigma.shape[:-2]
- # Select the first element in the batch, because scipy does not support batching.
- loc_t = loc.reshape(-1, event_shape)[0].asnumpy()
- sigma_t = sigma.reshape(-1, event_shape,
- event_shape)[0].asnumpy()
if mx_out.shape == ():
mx_out_t = mx_out.asnumpy()
else:
mx_out_t = mx_out.asnumpy().flatten()[0]
+ # Select the first element in the batch, because scipy does not support batching.
+ loc_t = loc.reshape(-1, event_shape)[0].asnumpy()
+ sigma_t = sigma.reshape(-1, event_shape,
+ event_shape)[0].asnumpy()
scipy_mvn = ss.multivariate_normal(loc_t, sigma_t)
ss_out = scipy_mvn.entropy()
assert_almost_equal(mx_out_t, ss_out, atol=1e-4,
@@ -2081,10 +2081,11 @@ def test_gluon_kl():
_dist_factory(dist, rate),
repeated_times)
+
# exponential, geometric
for dist in [mgp.Exponential, mgp.Geometric]:
for shape in shapes:
- def s(): return np.random.uniform(size=shape)
+ def s(): return np.random.uniform(size=shape, low=1e-3)
_test_zero_kl(_dist_factory(dist, s), shape)
if monte_carlo_test:
_test_monte_carlo(_dist_factory(dist, s),
diff --git a/tests/python/unittest/test_ndarray.py b/tests/python/unittest/test_ndarray.py
index 2aab5c0..a01746e 100644
--- a/tests/python/unittest/test_ndarray.py
+++ b/tests/python/unittest/test_ndarray.py
@@ -24,7 +24,7 @@ import pickle as pkl
import random
import functools
import pytest
-from common import with_seed, assertRaises, TemporaryDirectory
+from common import with_seed, assertRaises, TemporaryDirectory, setup_module, teardown_module
from mxnet.test_utils import almost_equal
from mxnet.test_utils import assert_almost_equal, assert_exception
from mxnet.test_utils import default_context
diff --git a/tests/python/unittest/test_numpy_contrib_gluon_data_vision.py b/tests/python/unittest/test_numpy_contrib_gluon_data_vision.py
index 9c9c7fd..8c3e76a 100644
--- a/tests/python/unittest/test_numpy_contrib_gluon_data_vision.py
+++ b/tests/python/unittest/test_numpy_contrib_gluon_data_vision.py
@@ -19,7 +19,7 @@ import mxnet as mx
import numpy as np
import scipy.ndimage
from mxnet.test_utils import *
-from common import assertRaises, with_seed
+from common import assertRaises, with_seed, setup_module, teardown_module
import shutil
import tempfile
import unittest
@@ -63,10 +63,11 @@ class TestImage(unittest.TestCase):
print("cleanup {}".format(self.IMAGES_DIR))
shutil.rmtree(self.IMAGES_DIR)
+ @with_seed()
@use_np
def test_imageiter(self):
im_list = [[np.random.randint(0, 5), x] for x in self.IMAGES]
- fname = './data/test_imageiter.lst'
+ fname = './data/test_numpy_imageiter.lst'
file_list = ['\t'.join([str(k), str(np.random.randint(0, 5)), x])
for k, x in enumerate(self.IMAGES)]
with open(fname, 'w') as f:
@@ -95,6 +96,7 @@ class TestImage(unittest.TestCase):
for batch in it:
pass
+ @with_seed()
@use_np
def test_image_bbox_iter(self):
im_list = [_generate_objects() + [x] for x in self.IMAGES]
@@ -110,7 +112,7 @@ class TestImage(unittest.TestCase):
pass
# test file list with last batch handle
- fname = './data/test_imagedetiter.lst'
+ fname = './data/test_numpy_imagedetiter.lst'
im_list = [[k] + _generate_objects() + [x] for k, x in enumerate(self.IMAGES)]
with open(fname, 'w') as f:
for line in im_list:
@@ -130,6 +132,7 @@ class TestImage(unittest.TestCase):
path_imglist=fname, path_root='', last_batch='keep')
]
+ @with_seed()
@use_np
def test_bbox_augmenters(self):
# only test if all augmenters will work
diff --git a/tests/python/unittest/test_numpy_interoperability.py b/tests/python/unittest/test_numpy_interoperability.py
index 8b50fc4..3b97864 100644
--- a/tests/python/unittest/test_numpy_interoperability.py
+++ b/tests/python/unittest/test_numpy_interoperability.py
@@ -29,7 +29,7 @@ from mxnet import np
from mxnet.test_utils import assert_almost_equal
from mxnet.test_utils import use_np
from mxnet.test_utils import is_op_runnable
-from common import assertRaises, with_seed
+from common import assertRaises, with_seed, random_seed
from mxnet.numpy_dispatch_protocol import with_array_function_protocol, with_array_ufunc_protocol
from mxnet.numpy_dispatch_protocol import _NUMPY_ARRAY_FUNCTION_LIST, _NUMPY_ARRAY_UFUNC_LIST
@@ -517,8 +517,8 @@ def _add_workload_linalg_cholesky():
dtypes = (np.float32, np.float64)
for shape, dtype in itertools.product(shapes, dtypes):
- _np.random.seed(1)
- a = _np.random.randn(*shape)
+ with random_seed(1):
+ a = _np.random.randn(*shape)
t = list(range(len(shape)))
t[-2:] = -1, -2
@@ -2898,7 +2898,6 @@ def _add_workload_unwrap():
phase[3:] += np.pi
phase_s = np.vstack((phase,phase))
OpArgMngr.add_workload('unwrap', phase)
- print(phase_s.shape)
OpArgMngr.add_workload('unwrap', phase_s, axis=1)
diff --git a/tests/python/unittest/test_numpy_op.py b/tests/python/unittest/test_numpy_op.py
index 88ad77f..edc41f9 100644
--- a/tests/python/unittest/test_numpy_op.py
+++ b/tests/python/unittest/test_numpy_op.py
@@ -30,7 +30,7 @@ from mxnet import np, npx
from mxnet.gluon import HybridBlock
from mxnet.base import MXNetError
from mxnet.test_utils import same, assert_almost_equal, rand_shape_nd, rand_ndarray
-from mxnet.test_utils import check_numeric_gradient, use_np, collapse_sum_like
+from mxnet.test_utils import check_numeric_gradient, use_np, collapse_sum_like, effective_dtype
from mxnet.test_utils import new_matrix_with_real_eigvals_nd
from mxnet.test_utils import new_sym_matrix_with_real_eigvals_nd
from common import assertRaises, with_seed, retry, xfail_when_nonstandard_decimal_separator
@@ -1849,15 +1849,18 @@ def test_npx_batch_norm(shape, fix_gamma, cudnn_off, output_mean_var):
running_mean = running_mean * momentum + \
data_mean_flat * (1 - momentum)
+
+ m = _np.prod(shape) / shape[axis]
+ # cudnn uses m-1 in the denominator of its sample variance calculation, not m
+ sample_var_adjust = 1.0 if cudnn_off or fix_gamma else m / (m-1)
running_var = running_var * momentum + \
- data_var_flat * (1 - momentum)
+ data_var_flat * sample_var_adjust * (1 - momentum)
W = bn_gamma.reshape(expand_shape)
dnx = ograd * W
xsm = data - data_mean
nd = 1.0 / np.sqrt(data_var + epsilon)
nx = xsm * nd
- m = _np.prod(shape) / shape[axis]
dvar = (dnx * xsm).sum(axis=reduce_axis, keepdims=True,
) * (-0.5) * np.power(nd, 3)
dmean = -nd * dnx.sum(axis=reduce_axis, keepdims=True) - \
@@ -1966,6 +1969,7 @@ def test_npx_softmax():
assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5, equal_nan=True)
mx_out.backward()
+ mx_a.grad.wait_to_read()
assert_almost_equal(mx_a.grad.asnumpy(), _np.zeros(shape), rtol=1e-3, atol=1e-5)
@@ -4608,7 +4612,7 @@ def test_np_random_grad():
scale = np.ones(scale)
mx_out = getattr(np.random, op_name)(loc, scale)
np_out = getattr(_np.random, op_name)(loc, scale)
- assert_almost_equal(mx_out.asnumpy().shape, np_out.shape)
+ assert mx_out.asnumpy().shape == np_out.shape
@with_seed()
@@ -4654,7 +4658,7 @@ def test_np_lognormal_grad():
for ((shape1, shape2), out_shape) in zip(param_shape, output_shapes):
mx_out = np.random.lognormal(np.zeros(shape1), np.ones(shape2), out_shape)
np_out = _np.random.lognormal(np.zeros(shape1).asnumpy(), np.ones(shape2).asnumpy(), out_shape)
- assert_almost_equal(mx_out.asnumpy().shape, np_out.shape)
+ assert mx_out.asnumpy().shape == np_out.shape
def _test_lognormal_exception(sigma):
output = np.random.lognormal(sigma=sigma).asnumpy()
@@ -4913,7 +4917,7 @@ def test_np_random_rayleigh():
with mx.autograd.record():
mx_out = test_rayleigh(scale)
np_out = _np.random.rayleigh(scale = scale.asnumpy(), size = shape)
- assert_almost_equal(np_out.shape, mx_out.shape)
+ assert np_out.shape == mx_out.shape
mx_out.backward()
assert scale.grad.shape == shape
assert_almost_equal(scale.grad.asnumpy().sum(), mx_out.asnumpy().sum(), rtol=1e-3, atol=1e-5)
@@ -4921,7 +4925,7 @@ def test_np_random_rayleigh():
for shape in shapes:
mx_out = np.random.rayleigh(np.array([1]), shape)
np_out = _np.random.rayleigh(np.array([1]).asnumpy(), shape)
- assert_almost_equal(mx_out.asnumpy().shape, np_out.shape)
+ assert mx_out.asnumpy().shape == np_out.shape
def _test_rayleigh_exception(scale):
output = np.random.rayleigh(scale=scale).asnumpy()
@@ -4954,7 +4958,7 @@ def test_np_exponential():
with mx.autograd.record():
mx_out = test_exponential_grad(scale)
np_out = _np.random.exponential(scale = scale.asnumpy(), size = out_shape)
- assert_almost_equal(np_out.shape, mx_out.shape)
+ assert np_out.shape == mx_out.shape
mx_out.backward()
assert scale.grad.shape == out_shape
assert_almost_equal(scale.grad.asnumpy().sum(), mx_out.asnumpy().sum(), rtol=1e-3, atol=1e-5)
@@ -5724,6 +5728,8 @@ def test_np_linalg_svd():
data_np = _np.random.uniform(-10.0, 10.0, shape)
data_np = _np.array(data_np, dtype=dtype)
data = np.array(data_np, dtype=dtype)
+ if effective_dtype(data) == np.dtype(np.float16):
+ continue
data.attach_grad()
with mx.autograd.record():
ret = test_svd(data)
@@ -6115,7 +6121,7 @@ def test_np_linalg_solve():
print(e)
else:
assert x.shape == x_expected.shape
- assert_almost_equal(x.asnumpy(), x_expected, rtol=rtol, atol=atol)
+ assert_almost_equal(x, x_expected)
def newInvertibleMatrix_2D(shape, max_cond=4):
while 1:
@@ -6155,7 +6161,6 @@ def test_np_linalg_solve():
nrhs = (-1, 0, 1, 2, 3)
dtypes = ['float32', 'float64']
for hybridize, shape, dtype, nrh in itertools.product([False, True], shapes, dtypes, nrhs):
- rtol, atol =1e-2, 1e-4
test_solve = TestSolve()
if hybridize:
test_solve.hybridize()
@@ -6189,8 +6194,8 @@ def test_np_linalg_solve():
mx.autograd.backward(mx_out)
b_backward_expected = get_grad_b(a.asnumpy(), mx_out.asnumpy())
a_backward_expected = -_np.matmul(b_backward_expected, _np.swapaxes(mx_out, -1, -2).asnumpy())
- assert_almost_equal(a.grad.asnumpy(), a_backward_expected, rtol=rtol, atol=atol)
- assert_almost_equal(b.grad.asnumpy(), b_backward_expected, rtol=rtol, atol=atol)
+ assert_almost_equal(a.grad, a_backward_expected)
+ assert_almost_equal(b.grad, b_backward_expected)
# check imperative once again
mx_out = np.linalg.solve(a, b)
@@ -6215,7 +6220,7 @@ def test_np_linalg_tensorinv():
print(e)
else:
assert inv_a.shape == inv_a_expected.shape
- assert_almost_equal(inv_a.asnumpy(), inv_a_expected, rtol=rtol, atol=atol)
+ assert_almost_equal(inv_a, inv_a_expected)
def newInvertibleMatrix_2D(shape, max_cond=4):
while 1:
@@ -6258,11 +6263,6 @@ def test_np_linalg_tensorinv():
]
dtypes = ['float32', 'float64']
for hybridize, shape, dtype, in itertools.product([False, True], shapes, dtypes):
- rtol = 1e-3
- atol = 1e-5
- if dtype == 'float32':
- rtol = 1e-2
- atol = 1e-4
ind = shape[0]
test_tensorinv = TestTensorinv(ind=ind)
if hybridize:
@@ -6290,7 +6290,7 @@ def test_np_linalg_tensorinv():
if 0 not in mx_out.shape:
mx.autograd.backward(mx_out)
grad_A_expected = get_grad_A(a.asnumpy(), ind)
- assert_almost_equal(a.grad.asnumpy(), grad_A_expected, rtol=rtol, atol=atol)
+ assert_almost_equal(a.grad, grad_A_expected)
# check imperative once again
mx_out = np.linalg.tensorinv(a, ind)
@@ -6343,7 +6343,7 @@ def test_np_linalg_tensorsolve():
print(e)
else:
assert x.shape == x_expected.shape
- assert_almost_equal(x.asnumpy(), x_expected, rtol=rtol, atol=atol)
+ assert_almost_equal(x, x_expected)
def shapeInfer(a_shape, b_shape, axes=None):
# b_shape - Right-hand tensor shape, which can be of any shape.
@@ -6405,8 +6405,6 @@ def test_np_linalg_tensorsolve():
for hybridize in [True, False]:
for dtype in dtypes:
for a_shape, b_shape, axes in shapes:
- rtol = 1e-2 if dtype == 'float32' else 1e-3
- atol = 1e-4 if dtype == 'float32' else 1e-5
test_tensorsolve = TestTensorsolve(axes)
if hybridize:
test_tensorsolve.hybridize()
@@ -6443,8 +6441,8 @@ def test_np_linalg_tensorsolve():
mx.autograd.backward(mx_out)
grad_a_expected, grad_b_expected = get_tensorsolve_backward(
a.asnumpy(), b.asnumpy(), mx_out.asnumpy(), a_axes, a_origin_axes, a_trans_shape)
- assert_almost_equal(a.grad.asnumpy(), grad_a_expected, rtol=rtol, atol=atol)
- assert_almost_equal(b.grad.asnumpy(), grad_b_expected, rtol=rtol, atol=atol)
+ assert_almost_equal(a.grad, grad_a_expected)
+ assert_almost_equal(b.grad, grad_b_expected)
# check imperative once again
mx_out = test_tensorsolve(a, b)
diff --git a/tests/python/unittest/test_operator.py b/tests/python/unittest/test_operator.py
index 1578e14..344cb72 100644
--- a/tests/python/unittest/test_operator.py
+++ b/tests/python/unittest/test_operator.py
@@ -458,21 +458,26 @@ def test_symbol_pow():
@with_seed()
def test_fully_connected():
+ # Create data of given shape as a uniform distribution centered on 0.0
+ def random_data(shape, dtype=np.float32):
+ return mx.nd.random.uniform(low=-0.5,
+ high=0.5, shape=shape, dtype=dtype)
data = mx.sym.var("data")
fc_weight = mx.sym.var("weight")
fc_bias = mx.sym.var("bias")
fc = mx.sym.FullyConnected(data=data, weight=fc_weight, bias=fc_bias, num_hidden=10, no_bias=False, name='fc')
- data = mx.nd.random.uniform(shape=(5, 5, 5, 13), dtype=np.float32)
- fc_weight = mx.nd.random.uniform(shape=(10, 325), dtype=np.float32)
- fc_bias = mx.nd.random.uniform(shape=(10), dtype=np.float32)
- fc_bias2 = mx.nd.random.uniform(shape=(10, 1), dtype=np.float32)
+
+ data = random_data(shape=(5, 5, 5, 13))
+ fc_weight = random_data(shape=(10, 325))
+ fc_bias = random_data(shape=(10))
+ fc_bias2 = random_data(shape=(10, 1))
+
data_np = data.asnumpy().reshape(5, 325)
fc_weight_np = np.transpose(fc_weight.asnumpy())
fc_bias_np = fc_bias.asnumpy()
res = np.dot(data_np, fc_weight_np) + fc_bias.asnumpy()
check_symbolic_forward(fc, {'data': data_np, 'weight': fc_weight.asnumpy(), 'bias': fc_bias_np}, {'fc_output': res})
- check_numeric_gradient(fc, {'data': data_np, 'weight': fc_weight.asnumpy(), 'bias': fc_bias_np},
- numeric_eps=1e-2, rtol=1e-4, atol=1e-2)
+ check_numeric_gradient(fc, {'data': data_np, 'weight': fc_weight.asnumpy(), 'bias': fc_bias_np})
# TODO: Fix Bug #15032 when bias has ndim > 1
#check_symbolic_forward(fc, {'data': data_np, 'weight': fc_weight.asnumpy(), 'bias': fc_bias2.asnumpy()}, {'fc_output': res})
@@ -1544,15 +1549,18 @@ def test_batchnorm(op_name, shape, fix_gamma, cudnn_off, output_mean_var):
running_mean = running_mean * momentum + \
data_mean_flat * (1 - momentum)
+
+ m = np.prod(shape) / shape[axis]
+ # cudnn uses m-1 in the denominator of its sample variance calculation, not m
+ sample_var_adjust = 1.0 if cudnn_off or fix_gamma else m / (m-1)
running_var = running_var * momentum + \
- data_var_flat * (1 - momentum)
+ data_var_flat * sample_var_adjust * (1 - momentum)
W = bn_gamma.reshape(expand_shape)
dnx = ograd * W
xsm = data - data_mean
nd = 1.0 / mx.nd.sqrt(data_var + epsilon)
nx = xsm * nd
- m = np.prod(shape) / shape[axis]
dvar = (dnx * xsm).sum(axis=axis, keepdims=True,
exclude=True) * (-0.5) * mx.nd.power(nd, 3)
dmean = -nd * dnx.sum(axis=axis, keepdims=True, exclude=True) - \
@@ -2478,13 +2486,13 @@ def test_reduce():
args_grad={'a': grad_nd})
net.forward(is_train=True)
- equal_forward = almost_equal_ignore_nan(net.outputs[0].asnumpy(), sum_groundtruth, 1E-4, 1E-4)
- assert equal_forward
+ # check forward
+ assert_almost_equal_ignore_nan(net.outputs[0].asnumpy(), sum_groundtruth, rtol=1e-4, atol=1e-4)
net.backward(out_grads=mx.nd.array(outgrad_npy))
bc_grad_groundtruth = np.broadcast_to(grad_groundtruth, grad_nd.shape)
- equal_backward = almost_equal_ignore_nan(grad_nd.asnumpy(), bc_grad_groundtruth, 1E-4, 1E-4)
- assert equal_backward
+ # check backward
+ assert_almost_equal_ignore_nan(grad_nd.asnumpy(), bc_grad_groundtruth, rtol=1e-4, atol=1e-4)
test_none_axis = [True, False]
for test_none in test_none_axis:
@@ -4074,7 +4082,7 @@ def test_order():
out_npy = gt_topk(dat=a_npy, axis=axis, ret_typ="value", k=a_npy.size, is_ascend=is_ascend)
else:
out_npy = gt_topk(dat=a_npy, axis=axis, ret_typ="value", k=5, is_ascend=is_ascend)
- check_numeric_gradient(b, location={'a': a_npy}, numeric_eps=1e-2, ctx=ctx)
+ check_numeric_gradient(b, location={'a': a_npy}, numeric_eps=1e-2, rtol=1e-2, ctx=ctx)
check_symbolic_forward(b, location={'a': a_npy}, expected=[out_npy])
b = mx.sym.topk(a, axis=1, is_ascend=is_ascend, ret_typ="indices", k=5)
@@ -4122,7 +4130,7 @@ def test_order():
for is_ascend in [True, False]:
b = mx.sym.topk(a, axis=axis, is_ascend=is_ascend, ret_typ="value", k=k)
out_npy = gt_topk(dat=a_npy, axis=axis, ret_typ="value", k=k, is_ascend=is_ascend)
- check_numeric_gradient(b, location={'a': a_npy}, numeric_eps=1e-2, ctx=ctx)
+ check_numeric_gradient(b, location={'a': a_npy}, numeric_eps=1e-2, rtol=1e-2, ctx=ctx)
check_symbolic_forward(b, location={'a': a_npy}, expected=[out_npy])
b = mx.sym.topk(a, axis=1, is_ascend=is_ascend, ret_typ="indices", k=5)
@@ -4285,7 +4293,7 @@ def test_grid_generator():
# check forward
exe.arg_dict['affine'][:] = np.array([[1.0,0,0,0,1.0,0]])
exe.forward(is_train=True)
- output = exe.outputs[0].asnumpy()
+ output = exe.outputs[0]
output[0,0,:,:] = (output[0,0,:,:] + 1) * (target_shape[1] - 1) / 2.0
output[0,1,:,:] = (output[0,1,:,:] + 1) * (target_shape[0] - 1) / 2.0
xv, yv = np.meshgrid(np.arange(target_shape[0]), np.arange(target_shape[1]))
@@ -4300,7 +4308,7 @@ def test_grid_generator():
tmp[1] = -1.0 + (np.arange(target_shape[0]*target_shape[1]) // target_shape[1]) * (2.0 / (target_shape[0]-1))
tmp[2] = 1
grad_est = np.dot(out_grad[0].reshape(2,target_shape[0]*target_shape[1]),tmp.T).reshape(1,6)
- assert_almost_equal(exe.grad_dict['affine'], grad_est, rtol=1e-3, atol=1e-5)
+ assert_almost_equal(exe.grad_dict['affine'], grad_est)
# check addto
exe = grid._simple_bind(ctx=default_context(), affine=(1,6), grad_req='add')
grid_grad_npy = np.random.normal(size=exe.grad_dict['affine'].shape)
@@ -4308,7 +4316,7 @@ def test_grid_generator():
exe.arg_dict['affine'][:] = np.array([[1.0, 0, 0, 0, 1.0, 0]])
exe.forward(is_train=True)
exe.backward(mx.nd.array(out_grad))
- assert_almost_equal(exe.grad_dict['affine'], grad_est + grid_grad_npy, rtol=1e-2, atol=1e-5)
+ assert_almost_equal(exe.grad_dict['affine'], grad_est + grid_grad_npy)
# transform_type = warp
test_case = [(12,21),(4,3),(6,12)]
@@ -5354,51 +5362,62 @@ def test_div_sqrt_dim():
check_symbolic_forward(test, [data_tmp], [data_tmp / np.sqrt(data_tmp.shape[-1])])
+# helper function to identify inputs likely to fail check_numeric_gradient tol test
+# due to finite difference method inaccuracies or function discontuities at the origin
+def bad_input_finder(f, f_grad, dtype):
+ eps = default_numeric_eps()[np.dtype(dtype)]
+ rtol = default_rtols()[np.dtype(dtype)]
+ def expected_relative_error(x):
+ fd_gradient = (f(x+eps/2) - f(x-eps/2)) / eps
+ return abs(fd_gradient/f_grad(x) - 1)
+ def is_fd_problem_input(x):
+ return abs(x) < eps/2 or expected_relative_error(x) > rtol
+ return np.vectorize(is_fd_problem_input)
+
@with_seed()
def test_reciprocal_op():
- eps = 2**(-11)
- data_tmp = np.random.rand(3, 4) * 10 - 5
- # Avoid possible division by 0 errors and finite difference method inaccuracies.
- # Factor of 6 below set empirically, depends on eps.
- # Issue exposed by seed 879579887.
- # Replace problematic inputs with 1.0.
- data_tmp[abs(data_tmp) < 6*eps] = 1.0
+ data_tmp = np.random.rand(3, 4).astype(np.float32) * 10 - 5
+
+ # Avoid possible division by 0 errors and finite difference method
+ # inaccuracies by replacing problem inputs with 1.0.
+ is_bad_input = bad_input_finder(np.reciprocal,
+ lambda x: -np.reciprocal(x)**2, np.float32)
+ data_tmp[is_bad_input(data_tmp)] = 1.0
data = mx.symbol.Variable('data')
test = mx.sym.reciprocal(data)
- check_numeric_gradient(test, [data_tmp], numeric_eps = eps)
+ check_numeric_gradient(test, [data_tmp])
check_symbolic_forward(test, [data_tmp], [np.reciprocal(data_tmp)])
@with_seed()
def test_cbrt_op():
- eps = 2**(-11)
- data_tmp = np.random.rand(3, 4) * 10 - 5
- # Avoid finite difference method inaccuracies due to infinite gradient at the origin.
- # Factor of 4 below set empirically, depends on eps.
- # Issue exposed by seed 553872106.
- # Replace problematic inputs with 1.0.
- data_tmp[abs(data_tmp) < 4*eps] = 1.0
+ data_tmp = np.random.rand(3, 4).astype(np.float32) * 10 - 5
+
+ # Avoid possible division by 0 errors and finite difference method
+ # inaccuracies by replacing problem inputs with 1.0.
+ is_bad_input = bad_input_finder(np.cbrt,
+ lambda x: 1./(3 * np.cbrt(x)**2), np.float32)
+ data_tmp[is_bad_input(data_tmp)] = 1.0
data = mx.symbol.Variable('data')
test = mx.sym.cbrt(data)
-
- check_numeric_gradient(test, [data_tmp], numeric_eps=eps)
+ check_numeric_gradient(test, [data_tmp])
check_symbolic_forward(test, [data_tmp], [np.cbrt(data_tmp)])
@with_seed()
def test_rcbrt_op():
- eps = 2**(-11)
- data_tmp = np.random.rand(3, 4) * 10 - 5
- # Avoid possible division by 0 errors and finite difference method inaccuracies.
- # Factor of 4 below set empirically, depends on eps.
- # Issue exposed by seed 788174893.
- # Replace problematic inputs with 1.0.
- data_tmp[abs(data_tmp) < 4*eps] = 1.0
+ data_tmp = np.random.rand(3, 4).astype(np.float32) * 10 - 5
+
+ # Avoid possible division by 0 errors and finite difference method
+ # inaccuracies by replacing problem inputs with 1.0.
+ is_bad_input = bad_input_finder(lambda x: 1./np.cbrt(x),
+ lambda x: -1./(3 * np.cbrt(x)**4), np.float32)
+ data_tmp[is_bad_input(data_tmp)] = 1.0
data = mx.symbol.Variable('data')
test = mx.sym.rcbrt(data)
- check_numeric_gradient(test, [data_tmp], numeric_eps = eps)
+ check_numeric_gradient(test, [data_tmp])
check_symbolic_forward(test, [data_tmp], [1/np.cbrt(data_tmp)])
@@ -5807,7 +5826,7 @@ def test_deformable_convolution():
# By now we only have gpu implementation
if default_context().device_type == 'gpu':
check_numeric_gradient(op, [im_data, offset_data, weight, bias], rtol=rtol, atol=atol,
- grad_nodes=grad_nodes, ctx=mx.gpu(0))
+ grad_nodes=grad_nodes, ctx=mx.gpu(0), numeric_eps=1.0/64)
def _validate_sample_location(input_rois, input_offset, spatial_scale, pooled_w, pooled_h, sample_per_part, part_size, output_dim, num_classes, trans_std, feat_h, feat_w):
@@ -5900,10 +5919,11 @@ def test_deformable_psroipooling():
grad_nodes=grad_nodes, ctx=mx.gpu(0))
-def _gemm_test_helper(dtype, grad_check, rtol_fw = 1e-7, atol_fw = 1e-9):
- num_eps = 1e-6
- rtol_bw = 1e-5
- atol_bw = 1e-6
+def _gemm_test_helper(dtype, grad_check, rtol_fw = None, atol_fw = None,
+ rtol_bw = None, atol_bw = None, num_eps = None):
+ def np_random_data(shape, dtype=np.float32):
+ return np.random.uniform(low=-0.5,
+ high=0.5, size=shape).astype(dtype)
data1 = mx.symbol.Variable('data1')
data2 = mx.symbol.Variable('data2')
@@ -5922,10 +5942,10 @@ def _gemm_test_helper(dtype, grad_check, rtol_fw = 1e-7, atol_fw = 1e-9):
shape2 = (3, 2)
shape3 = (3, 3)
shape4 = (2, 2)
- data_in1 = np.random.uniform(1, 10, shape1).astype(dtype)
- data_in2 = np.random.uniform(1, 10, shape2).astype(dtype)
- data_in3 = np.random.uniform(1, 10, shape3).astype(dtype)
- data_in4 = np.random.uniform(1, 10, shape4).astype(dtype)
+ data_in1 = np_random_data(shape1, dtype)
+ data_in2 = np_random_data(shape2, dtype)
+ data_in3 = np_random_data(shape3, dtype)
+ data_in4 = np_random_data(shape4, dtype)
# Check all transpositions of gemm operator.
data_in1_t = np.transpose(data_in1)
data_in2_t = np.transpose(data_in2)
@@ -6032,10 +6052,10 @@ def _gemm_test_helper(dtype, grad_check, rtol_fw = 1e-7, atol_fw = 1e-9):
def test_gemm():
_gemm_test_helper(np.float64, True)
os.environ["MXNET_CUDA_TENSOR_OP_MATH_ALLOW_CONVERSION"] = "0"
- _gemm_test_helper(np.float32, False, rtol_fw = 1e-5, atol_fw = 1e-7)
+ _gemm_test_helper(np.float32, True)
if default_context().device_type == 'gpu':
os.environ["MXNET_CUDA_TENSOR_OP_MATH_ALLOW_CONVERSION"] = "1"
- _gemm_test_helper(np.float32, False, rtol_fw = 2e-5, atol_fw = 2e-7)
+ _gemm_test_helper(np.float32, True)
os.environ["MXNET_CUDA_TENSOR_OP_MATH_ALLOW_CONVERSION"] = "0"
# Helper functions for test_laop
diff --git a/tests/python/unittest/test_sparse_operator.py b/tests/python/unittest/test_sparse_operator.py
index dc07201..8bc086e 100644
--- a/tests/python/unittest/test_sparse_operator.py
+++ b/tests/python/unittest/test_sparse_operator.py
@@ -1598,6 +1598,7 @@ def test_sparse_axis_operations():
@with_seed()
+@pytest.mark.serial
def test_sparse_square_sum():
dim0 = 30
dim1 = 30