You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by GitBox <gi...@apache.org> on 2020/04/20 16:36:32 UTC
[GitHub] [incubator-mxnet] szha commented on issue #18100: curand error in tests
szha commented on issue #18100:
URL: https://github.com/apache/incubator-mxnet/issues/18100#issuecomment-616669873
http://jenkins.mxnet-ci.amazon-ml.com/blue/organizations/jenkins/mxnet-validation%2Funix-gpu/detail/PR-18025/36/pipeline
```
=================================== FAILURES ===================================
______________________________ test_np_histogram _______________________________
@with_seed()
@use_np
def test_np_histogram():
shapes = [(), (3, 4), (3, 0)]
for shape in shapes:
mx_a = np.random.uniform(0.0, 10.0, size=shape)
> np_a = mx_a.asnumpy()
tests/python/unittest/test_numpy_op.py:4494:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
python/mxnet/ndarray/ndarray.py:2566: in asnumpy
ctypes.c_size_t(data.size)))
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
ret = -1
def check_call(ret):
"""Check the return value of C API call.
This function will raise an exception when an error occurs.
Wrap every API call with this function.
Parameters
----------
ret : int
return value from API calls.
"""
if ret != 0:
> raise get_last_ffi_error()
E mxnet.base.MXNetError: Traceback (most recent call last):
E [bt] (9) /work/mxnet/python/mxnet/../../build/libmxnet.so(std::thread::_State_impl<std::thread::_Invoker<std::tuple<std::function<void (std::shared_ptr<dmlc::ManualEvent>)>, std::shared_ptr<dmlc::ManualEvent> > > >::_M_run()+0x4a) [0x7f23de99d4da]
E [bt] (8) /work/mxnet/python/mxnet/../../build/libmxnet.so(std::_Function_handler<void (std::shared_ptr<dmlc::ManualEvent>), mxnet::engine::ThreadedEnginePerDevice::PushToExecute(mxnet::engine::OprBlock*, bool)::{lambda()#4}::operator()() const::{lambda(std::shared_ptr<dmlc::ManualEvent>)#1}>::_M_invoke(std::_Any_data const&, std::shared_ptr<dmlc::ManualEvent>&&)+0x4e) [0x7f23de9a17fe]
E [bt] (7) /work/mxnet/python/mxnet/../../build/libmxnet.so(void mxnet::engine::ThreadedEnginePerDevice::GPUWorker<(dmlc::ConcurrentQueueType)0>(mxnet::Context, bool, mxnet::engine::ThreadedEnginePerDevice::ThreadWorkerBlock<(dmlc::ConcurrentQueueType)0>*, std::shared_ptr<dmlc::ManualEvent> const&)+0x11d) [0x7f23de9a151d]
E [bt] (6) /work/mxnet/python/mxnet/../../build/libmxnet.so(mxnet::engine::ThreadedEngine::ExecuteOprBlock(mxnet::RunContext, mxnet::engine::OprBlock*)+0x121) [0x7f23de99e491]
E [bt] (5) /work/mxnet/python/mxnet/../../build/libmxnet.so(+0x20828ee) [0x7f23de9938ee]
E [bt] (4) /work/mxnet/python/mxnet/../../build/libmxnet.so(std::_Function_handler<void (mxnet::RunContext), mxnet::imperative::PushFCompute(std::function<void (nnvm::NodeAttrs const&, mxnet::OpContext const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&)> const&, nnvm::Op const*, nnvm::NodeAttrs const&, mxnet::Context const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::Resource, std::allocator<mxnet::Resource> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<unsigned int, std::allocator<unsigned int> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&)::{lambda(mxnet::RunContext)#1}>::_M_invoke(std::_Any_data const&, mxnet::RunContext&&)+0x17) [0x7f23dea6a027]
E [bt] (3) /work/mxnet/python/mxnet/../../build/libmxnet.so(mxnet::imperative::PushFCompute(std::function<void (nnvm::NodeAttrs const&, mxnet::OpContext const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&)> const&, nnvm::Op const*, nnvm::NodeAttrs const&, mxnet::Context const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::Resource, std::allocator<mxnet::Resource> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<unsigned int, std::allocator<unsigned int> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&)::{lambda(mxnet::RunContext)#1}::operator()(mxnet::RunContext) const+0x934) [0x7f23dea69bd4]
E [bt] (2) /work/mxnet/python/mxnet/../../build/libmxnet.so(void mxnet::op::NumpyUniformForward<mshadow::gpu>(nnvm::NodeAttrs const&, mxnet::OpContext const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&)+0xd34) [0x7f23e4aade64]
E [bt] (1) /work/mxnet/python/mxnet/../../build/libmxnet.so(mshadow::Random<mshadow::gpu, float>::GenUniform(float*, unsigned long)+0x100) [0x7f23e1b26ea0]
E [bt] (0) /work/mxnet/python/mxnet/../../build/libmxnet.so(dmlc::LogMessageFatal::~LogMessageFatal()+0x7f) [0x7f23de7900cf]
E File "/work/mxnet/include/mshadow/./random.h", line 396
E MXNetError: Check failed: status == CURAND_STATUS_SUCCESS (102 vs. 0) : CURAND Gen Uniform float failed. size = 1
python/mxnet/base.py:246: MXNetError
________________________________ test_np_choice ________________________________
@with_seed()
@use_np
def test_np_choice():
class TestUniformChoice(HybridBlock):
def __init__(self, sample_size, replace):
super(TestUniformChoice, self).__init__()
self.sample_size = sample_size
self.replace = replace
def hybrid_forward(self, F, a):
return F.np.random.choice(a=a, size=self.sample_size, replace=self.replace, p=None)
class TestWeightedChoice(HybridBlock):
def __init__(self, sample_size, replace):
super(TestWeightedChoice, self).__init__()
self.sample_size = sample_size
self.replace = replace
def hybrid_forward(self, F, a, p):
op = getattr(F.np.random, "choice", None)
return F.np.random.choice(a, self.sample_size, self.replace, p)
def test_sample_with_replacement(sampler, num_classes, shape, weight=None):
samples = sampler(num_classes, shape, replace=True, p=weight).asnumpy()
generated_density = _np.histogram(samples, _np.arange(num_classes + 1), density=True)[0]
expected_density = (weight.asnumpy() if weight is not None else
_np.array([1 / num_classes] * num_classes))
# test almost equal
assert_almost_equal(generated_density, expected_density, rtol=1e-1, atol=1e-1)
# test shape
assert (samples.shape == shape)
def test_sample_without_replacement(sampler, num_classes, shape, num_trials, weight=None):
samples = sampler(num_classes, shape, replace=False, p=weight).asnumpy()
# Check shape and uniqueness
assert samples.shape == shape
assert len(_np.unique(samples)) == samples.size
# Check distribution
bins = _np.zeros((num_classes))
expected_freq = (weight.asnumpy() if weight is not None else
_np.array([1 / num_classes] * num_classes))
for i in range(num_trials):
out = sampler(num_classes, 1, replace=False, p=weight).item()
bins[out] += 1
bins /= num_trials
assert_almost_equal(bins, expected_freq, rtol=1e-1, atol=1e-1)
def test_indexing_mode(sampler, set_size, samples_size, replace, weight=None):
a = np.arange(set_size)
if weight is not None:
samples = sampler(a, weight)
else:
samples = sampler(a)
assert len(samples) == samples_size
if not replace:
assert len(_np.unique(samples.asnumpy())) == samples_size
num_classes = 10
num_samples = 10 ** 8
# Density tests are commented out due to their huge time comsumption.
# Tests passed locally.
# shape_list1 = [
# (10 ** 8, 1),
# (10 ** 5, 10 ** 3),
# (10 ** 2, 10 ** 3, 10 ** 3)
# ]
# for shape in shape_list1:
# test_sample_with_replacement(np.random.choice, num_classes, shape)
# weight = np.array(_np.random.dirichlet([1.0] * num_classes))
# test_sample_with_replacement(np.random.choice, num_classes, shape, weight)
# Tests passed locally,
# commented out for the same reason as above.
# shape_list2 = [
# (6, 1),
# (2, 3),
# (1, 2, 3),
# (2, 2),
# ]
# for shape in shape_list2:
# test_sample_without_replacement(np.random.choice, num_classes, shape, 10 ** 5)
# weight = np.array(_np.random.dirichlet([1.0] * num_classes))
# test_sample_without_replacement(np.random.choice, num_classes, shape, 10 ** 5, weight)
# Test hypridize mode:
for wtype in ['float16', 'float32', 'float64']:
for hybridize in [True, False]:
for replace in [True, False]:
test_choice = TestUniformChoice(num_classes // 2, replace)
test_choice_weighted = TestWeightedChoice(num_classes // 2, replace)
if hybridize:
test_choice.hybridize()
test_choice_weighted.hybridize()
weight = np.array(_np.random.dirichlet([1.0] * num_classes)).astype(wtype)
> test_indexing_mode(test_choice, num_classes, num_classes // 2, replace, None)
tests/python/unittest/test_numpy_op.py:4598:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
tests/python/unittest/test_numpy_op.py:4559: in test_indexing_mode
assert len(_np.unique(samples.asnumpy())) == samples_size
python/mxnet/ndarray/ndarray.py:2566: in asnumpy
ctypes.c_size_t(data.size)))
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
ret = -1
def check_call(ret):
"""Check the return value of C API call.
This function will raise an exception when an error occurs.
Wrap every API call with this function.
Parameters
----------
ret : int
return value from API calls.
"""
if ret != 0:
> raise get_last_ffi_error()
E mxnet.base.MXNetError: Traceback (most recent call last):
E [bt] (9) /usr/lib/x86_64-linux-gnu/libstdc++.so.6(+0xbd53f) [0x7f24564f253f]
E [bt] (8) /work/mxnet/python/mxnet/../../build/libmxnet.so(std::thread::_State_impl<std::thread::_Invoker<std::tuple<std::function<void (std::shared_ptr<dmlc::ManualEvent>)>, std::shared_ptr<dmlc::ManualEvent> > > >::_M_run()+0x4a) [0x7f23de99d4da]
E [bt] (7) /work/mxnet/python/mxnet/../../build/libmxnet.so(std::_Function_handler<void (std::shared_ptr<dmlc::ManualEvent>), mxnet::engine::ThreadedEnginePerDevice::PushToExecute(mxnet::engine::OprBlock*, bool)::{lambda()#4}::operator()() const::{lambda(std::shared_ptr<dmlc::ManualEvent>)#1}>::_M_invoke(std::_Any_data const&, std::shared_ptr<dmlc::ManualEvent>&&)+0x4e) [0x7f23de9a17fe]
E [bt] (6) /work/mxnet/python/mxnet/../../build/libmxnet.so(void mxnet::engine::ThreadedEnginePerDevice::GPUWorker<(dmlc::ConcurrentQueueType)0>(mxnet::Context, bool, mxnet::engine::ThreadedEnginePerDevice::ThreadWorkerBlock<(dmlc::ConcurrentQueueType)0>*, std::shared_ptr<dmlc::ManualEvent> const&)+0x11d) [0x7f23de9a151d]
E [bt] (5) /work/mxnet/python/mxnet/../../build/libmxnet.so(mxnet::engine::ThreadedEngine::ExecuteOprBlock(mxnet::RunContext, mxnet::engine::OprBlock*)+0x121) [0x7f23de99e491]
E [bt] (4) /work/mxnet/python/mxnet/../../build/libmxnet.so(std::_Function_handler<void (mxnet::RunContext, mxnet::engine::CallbackOnComplete), mxnet::engine::ThreadedEngine::BulkFlush()::{lambda(mxnet::RunContext, mxnet::engine::CallbackOnComplete)#1}>::_M_invoke(std::_Any_data const&, mxnet::RunContext&&, mxnet::engine::CallbackOnComplete&&)+0xba) [0x7f23de997c2a]
E [bt] (3) /work/mxnet/python/mxnet/../../build/libmxnet.so(std::_Function_handler<void (mxnet::RunContext), mxnet::imperative::PushFCompute(std::function<void (nnvm::NodeAttrs const&, mxnet::OpContext const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&)> const&, nnvm::Op const*, nnvm::NodeAttrs const&, mxnet::Context const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::Resource, std::allocator<mxnet::Resource> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<unsigned int, std::allocator<unsigned int> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&)::{lambda(mxnet::RunContext)#1}>::_M_invoke(std::_Any_data const&, mxnet::RunContext&&)+0x17) [0x7f23dea6a027]
E [bt] (2) /work/mxnet/python/mxnet/../../build/libmxnet.so(mxnet::imperative::PushFCompute(std::function<void (nnvm::NodeAttrs const&, mxnet::OpContext const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&)> const&, nnvm::Op const*, nnvm::NodeAttrs const&, mxnet::Context const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::Resource, std::allocator<mxnet::Resource> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<unsigned int, std::allocator<unsigned int> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&)::{lambda(mxnet::RunContext)#1}::operator()(mxnet::RunContext) const+0x934) [0x7f23dea69bd4]
E [bt] (1) /work/mxnet/python/mxnet/../../build/libmxnet.so(void mxnet::op::NumpyChoiceForward<mshadow::gpu>(nnvm::NodeAttrs const&, mxnet::OpContext const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&)+0x16e9) [0x7f23e47986e9]
E [bt] (0) /work/mxnet/python/mxnet/../../build/libmxnet.so(dmlc::LogMessageFatal::~LogMessageFatal()+0x7f) [0x7f23de7900cf]
E [08:59:51] /work/mxnet/include/mshadow/./random.h:321: Check failed: (status) == (CURAND_STATUS_SUCCESS) CURAND Gen rand ints failed.
python/mxnet/base.py:246: MXNetError
______________________________ test_np_full_like _______________________________
@with_seed()
@use_np
def test_np_full_like():
class TestFullLike(HybridBlock):
def __init__(self, fill_value, dtype, ctx):
super(TestFullLike, self).__init__()
self._fill_value = fill_value
self._dtype = dtype
self._ctx = ctx
def hybrid_forward(self, F, x, *args, **kwargs):
return F.np.full_like(x, self._fill_value, dtype=self._dtype, ctx=self._ctx)
if StrictVersion(platform.python_version()) < StrictVersion('3.0.0'):
return
dtypes = ['float64', 'float32', 'float16', 'int64', 'int32', 'int8', 'bool']
shapes = [
(),
(1,),
(4, 3),
(4, 5),
(2, 1),
(6, 5, 6),
(4, 2, 1, 2),
(5, 1, 3, 3),
(3, 3, 1, 0),
]
# numpy.full_like operator in py2 cannot handle shape like (5, 0, 3) properly
fill_values = [0, 1, 2, 3, 4, 5, 6, True, False]
flags = [True, False]
for fill_value, dtype, shape, hybridize in itertools.product(
fill_values, dtypes, shapes, flags):
param_dtype = _np.random.choice(dtypes)
a = np.random.uniform(low=0, high=100, size=shape, dtype='float64').astype(dtype)
test = TestFullLike(fill_value, param_dtype, npx.current_context())
> expected_ret = _np.full_like(a.asnumpy(), fill_value=fill_value, dtype=param_dtype)
tests/python/unittest/test_numpy_op.py:6479:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
python/mxnet/ndarray/ndarray.py:2566: in asnumpy
ctypes.c_size_t(data.size)))
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
ret = -1
def check_call(ret):
"""Check the return value of C API call.
This function will raise an exception when an error occurs.
Wrap every API call with this function.
Parameters
----------
ret : int
return value from API calls.
"""
if ret != 0:
> raise get_last_ffi_error()
E mxnet.base.MXNetError: Traceback (most recent call last):
E [bt] (9) /work/mxnet/python/mxnet/../../build/libmxnet.so(std::thread::_State_impl<std::thread::_Invoker<std::tuple<std::function<void (std::shared_ptr<dmlc::ManualEvent>)>, std::shared_ptr<dmlc::ManualEvent> > > >::_M_run()+0x4a) [0x7f23de99d4da]
E [bt] (8) /work/mxnet/python/mxnet/../../build/libmxnet.so(std::_Function_handler<void (std::shared_ptr<dmlc::ManualEvent>), mxnet::engine::ThreadedEnginePerDevice::PushToExecute(mxnet::engine::OprBlock*, bool)::{lambda()#4}::operator()() const::{lambda(std::shared_ptr<dmlc::ManualEvent>)#1}>::_M_invoke(std::_Any_data const&, std::shared_ptr<dmlc::ManualEvent>&&)+0x4e) [0x7f23de9a17fe]
E [bt] (7) /work/mxnet/python/mxnet/../../build/libmxnet.so(void mxnet::engine::ThreadedEnginePerDevice::GPUWorker<(dmlc::ConcurrentQueueType)0>(mxnet::Context, bool, mxnet::engine::ThreadedEnginePerDevice::ThreadWorkerBlock<(dmlc::ConcurrentQueueType)0>*, std::shared_ptr<dmlc::ManualEvent> const&)+0x11d) [0x7f23de9a151d]
E [bt] (6) /work/mxnet/python/mxnet/../../build/libmxnet.so(mxnet::engine::ThreadedEngine::ExecuteOprBlock(mxnet::RunContext, mxnet::engine::OprBlock*)+0x121) [0x7f23de99e491]
E [bt] (5) /work/mxnet/python/mxnet/../../build/libmxnet.so(+0x20828ee) [0x7f23de9938ee]
E [bt] (4) /work/mxnet/python/mxnet/../../build/libmxnet.so(std::_Function_handler<void (mxnet::RunContext), mxnet::imperative::PushFCompute(std::function<void (nnvm::NodeAttrs const&, mxnet::OpContext const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&)> const&, nnvm::Op const*, nnvm::NodeAttrs const&, mxnet::Context const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::Resource, std::allocator<mxnet::Resource> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<unsigned int, std::allocator<unsigned int> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&)::{lambda(mxnet::RunContext)#1}>::_M_invoke(std::_Any_data const&, mxnet::RunContext&&)+0x17) [0x7f23dea6a027]
E [bt] (3) /work/mxnet/python/mxnet/../../build/libmxnet.so(mxnet::imperative::PushFCompute(std::function<void (nnvm::NodeAttrs const&, mxnet::OpContext const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&)> const&, nnvm::Op const*, nnvm::NodeAttrs const&, mxnet::Context const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::Resource, std::allocator<mxnet::Resource> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<unsigned int, std::allocator<unsigned int> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&)::{lambda(mxnet::RunContext)#1}::operator()(mxnet::RunContext) const+0x934) [0x7f23dea69bd4]
E [bt] (2) /work/mxnet/python/mxnet/../../build/libmxnet.so(void mxnet::op::NumpyUniformForward<mshadow::gpu>(nnvm::NodeAttrs const&, mxnet::OpContext const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&)+0xd34) [0x7f23e4aade64]
E [bt] (1) /work/mxnet/python/mxnet/../../build/libmxnet.so(mshadow::Random<mshadow::gpu, float>::GenUniform(float*, unsigned long)+0x100) [0x7f23e1b26ea0]
E [bt] (0) /work/mxnet/python/mxnet/../../build/libmxnet.so(dmlc::LogMessageFatal::~LogMessageFatal()+0x7f) [0x7f23de7900cf]
E File "/work/mxnet/include/mshadow/./random.h", line 396
E MXNetError: Check failed: status == CURAND_STATUS_SUCCESS (202 vs. 0) : CURAND Gen Uniform float failed. size = 1
python/mxnet/base.py:246: MXNetError
_______________________________ test_np_diagflat _______________________________
@with_seed()
@use_np
def test_np_diagflat():
class TestDiagflat(HybridBlock):
def __init__(self, k=0):
super(TestDiagflat,self).__init__()
self._k = k
def hybrid_forward(self,F,a):
return F.np.diagflat(a, k=self._k)
shapes = [(2,),5 , (1,5), (2,2), (2,5), (3,3), (4,3),(4,4,5)] # test_shapes, remember to include zero-dim shape and zero-size shapes
dtypes = [np.int8, np.uint8, np.int32, np.int64, np.float16, np.float32, np.float64] # remember to include all meaningful data types for the operator
range_k = 6
for hybridize,shape,dtype, in itertools.product([False,True],shapes,dtypes):
rtol = 1e-2 if dtype == np.float16 else 1e-3
atol = 1e-4 if dtype == np.float16 else 1e-5
for k in range(-range_k,range_k):
test_diagflat = TestDiagflat(k)
if hybridize:
test_diagflat.hybridize()
x = np.random.uniform(-1.0,1.0, size = shape).astype(dtype)
x.attach_grad()
> np_out = _np.diagflat(x.asnumpy(), k)
tests/python/unittest/test_numpy_op.py:7464:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
python/mxnet/ndarray/ndarray.py:2566: in asnumpy
ctypes.c_size_t(data.size)))
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
ret = -1
def check_call(ret):
"""Check the return value of C API call.
This function will raise an exception when an error occurs.
Wrap every API call with this function.
Parameters
----------
ret : int
return value from API calls.
"""
if ret != 0:
> raise get_last_ffi_error()
E mxnet.base.MXNetError: Traceback (most recent call last):
E [bt] (9) /work/mxnet/python/mxnet/../../build/libmxnet.so(std::thread::_State_impl<std::thread::_Invoker<std::tuple<std::function<void (std::shared_ptr<dmlc::ManualEvent>)>, std::shared_ptr<dmlc::ManualEvent> > > >::_M_run()+0x4a) [0x7f23de99d4da]
E [bt] (8) /work/mxnet/python/mxnet/../../build/libmxnet.so(std::_Function_handler<void (std::shared_ptr<dmlc::ManualEvent>), mxnet::engine::ThreadedEnginePerDevice::PushToExecute(mxnet::engine::OprBlock*, bool)::{lambda()#4}::operator()() const::{lambda(std::shared_ptr<dmlc::ManualEvent>)#1}>::_M_invoke(std::_Any_data const&, std::shared_ptr<dmlc::ManualEvent>&&)+0x4e) [0x7f23de9a17fe]
E [bt] (7) /work/mxnet/python/mxnet/../../build/libmxnet.so(void mxnet::engine::ThreadedEnginePerDevice::GPUWorker<(dmlc::ConcurrentQueueType)0>(mxnet::Context, bool, mxnet::engine::ThreadedEnginePerDevice::ThreadWorkerBlock<(dmlc::ConcurrentQueueType)0>*, std::shared_ptr<dmlc::ManualEvent> const&)+0x11d) [0x7f23de9a151d]
E [bt] (6) /work/mxnet/python/mxnet/../../build/libmxnet.so(mxnet::engine::ThreadedEngine::ExecuteOprBlock(mxnet::RunContext, mxnet::engine::OprBlock*)+0x121) [0x7f23de99e491]
E [bt] (5) /work/mxnet/python/mxnet/../../build/libmxnet.so(+0x20828ee) [0x7f23de9938ee]
E [bt] (4) /work/mxnet/python/mxnet/../../build/libmxnet.so(std::_Function_handler<void (mxnet::RunContext), mxnet::imperative::PushFCompute(std::function<void (nnvm::NodeAttrs const&, mxnet::OpContext const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&)> const&, nnvm::Op const*, nnvm::NodeAttrs const&, mxnet::Context const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::Resource, std::allocator<mxnet::Resource> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<unsigned int, std::allocator<unsigned int> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&)::{lambda(mxnet::RunContext)#1}>::_M_invoke(std::_Any_data const&, mxnet::RunContext&&)+0x17) [0x7f23dea6a027]
E [bt] (3) /work/mxnet/python/mxnet/../../build/libmxnet.so(mxnet::imperative::PushFCompute(std::function<void (nnvm::NodeAttrs const&, mxnet::OpContext const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&)> const&, nnvm::Op const*, nnvm::NodeAttrs const&, mxnet::Context const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::Resource, std::allocator<mxnet::Resource> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<unsigned int, std::allocator<unsigned int> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&)::{lambda(mxnet::RunContext)#1}::operator()(mxnet::RunContext) const+0x934) [0x7f23dea69bd4]
E [bt] (2) /work/mxnet/python/mxnet/../../build/libmxnet.so(void mxnet::op::NumpyUniformForward<mshadow::gpu>(nnvm::NodeAttrs const&, mxnet::OpContext const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&)+0xd34) [0x7f23e4aade64]
E [bt] (1) /work/mxnet/python/mxnet/../../build/libmxnet.so(mshadow::Random<mshadow::gpu, float>::GenUniform(float*, unsigned long)+0x100) [0x7f23e1b26ea0]
E [bt] (0) /work/mxnet/python/mxnet/../../build/libmxnet.so(dmlc::LogMessageFatal::~LogMessageFatal()+0x7f) [0x7f23de7900cf]
E File "/work/mxnet/include/mshadow/./random.h", line 396
E MXNetError: Check failed: status == CURAND_STATUS_SUCCESS (202 vs. 0) : CURAND Gen Uniform float failed. size = 2
python/mxnet/base.py:246: MXNetError
________________________________ test_batchnorm ________________________________
@with_seed()
def test_batchnorm():
momentum = 0.9
epsilon = 1e-5
def _test_batchnorm_impl(op, shape, axis, cudnn_off, output_mean_var):
print(str((op, shape, axis, cudnn_off)))
kwargs = dict(output_mean_var=output_mean_var)
if op == mx.nd.contrib.SyncBatchNorm:
if axis != 1:
return
key = str(op) + str(shape) + str(axis)
kwargs.update(dict(key=key))
if cudnn_off:
return
else:
kwargs.update(dict(axis=axis, cudnn_off=cudnn_off))
nch = shape[axis]
bn_gamma = mx.nd.random.uniform(shape=(nch,))
bn_gamma.attach_grad()
bn_beta = mx.nd.random.uniform(shape=(nch,))
bn_beta.attach_grad()
bn_running_mean = mx.nd.zeros(nch)
bn_running_var = mx.nd.ones(nch)
running_mean = mx.nd.zeros(nch)
running_var = mx.nd.ones(nch)
num_iters = 10
expand_shape = [1] * len(shape)
expand_shape[axis] = shape[axis]
for _ in range(num_iters):
data = mx.nd.random.uniform(shape=shape)
data.attach_grad()
ograd = mx.nd.random.uniform(shape=shape)
with mx.autograd.record():
output = op(data, bn_gamma, bn_beta,
bn_running_mean, bn_running_var,
momentum=momentum, eps=epsilon,
fix_gamma=False, **kwargs)
if output_mean_var:
output, output_mean, output_std = output
output.backward(ograd)
mx.nd.waitall()
data_mean = data.mean(
axis=axis, exclude=True, keepdims=True)
data_var = (data - data_mean).square().mean(axis=axis,
exclude=True,
keepdims=True)
target_output = (data - data_mean) / \
(data_var + epsilon).sqrt() * \
bn_gamma.reshape(expand_shape) + \
bn_beta.reshape(expand_shape)
# squeeze data_mean and data_var
data_mean_flat = data_mean.squeeze()
data_var_flat = data_var.squeeze()
running_mean = running_mean * momentum + \
data_mean_flat * (1 - momentum)
running_var = running_var * momentum + \
data_var_flat * (1 - momentum)
W = bn_gamma.reshape(expand_shape)
dnx = ograd * W
xsm = data - data_mean
nd = 1.0 / mx.nd.sqrt(data_var + epsilon)
nx = xsm * nd
m = np.prod(shape) / shape[axis]
dvar = (dnx * xsm).sum(axis=axis, keepdims=True,
exclude=True) * (-0.5) * mx.nd.power(nd, 3)
dmean = -nd * dnx.sum(axis=axis, keepdims=True, exclude=True) - \
dvar * xsm.mean(axis=axis, keepdims=True,
exclude=True) * 2.0
dX = dnx * nd + dvar * xsm * (2.0 / m) + dmean * (1.0 / m)
dW = (ograd * nx).sum(axis=axis, exclude=True)
db = ograd.sum(axis=axis, exclude=True)
atol = 1e-2
rtol = 1e-2
if output_mean_var:
assert_almost_equal(output_mean.asnumpy(),
data_mean_flat.asnumpy(),
atol=atol, rtol=rtol)
if op != mx.nd.contrib.SyncBatchNorm:
assert_almost_equal(output_std.asnumpy(),
(1.0 / (data_var_flat +
epsilon).sqrt()).asnumpy(),
atol=atol, rtol=rtol)
else:
assert_almost_equal(output_std.asnumpy(),
data_var_flat.asnumpy(),
atol=atol, rtol=rtol)
assert_almost_equal(output.asnumpy(), target_output.asnumpy(),
atol=atol, rtol=rtol)
assert_almost_equal(bn_running_mean.asnumpy(
), running_mean.asnumpy(), atol=atol, rtol=rtol)
assert_almost_equal(bn_running_var.asnumpy(
), running_var.asnumpy(), atol=atol, rtol=rtol)
assert_almost_equal(data.grad.asnumpy(),
dX.asnumpy(), atol=atol, rtol=rtol)
assert_almost_equal(
bn_gamma.grad.asnumpy(), dW.asnumpy(), atol=atol, rtol=rtol)
assert_almost_equal(
bn_beta.grad.asnumpy(), db.asnumpy(), atol=atol, rtol=rtol)
for op in [mx.nd.BatchNorm, mx.nd.contrib.SyncBatchNorm]:
for shape in [(24, 2), (24, 3, 4), (24, 4, 4, 4), (24, 8, 4, 4), (24, 5, 6, 4, 4)]:
for axis in range(len(shape)):
for cudnn_off in [False, True]:
for output_mean_var in [False, True]:
_test_batchnorm_impl(op, shape, axis,
> cudnn_off, output_mean_var)
tests/python/unittest/test_operator.py:1943:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
tests/python/unittest/test_operator.py:1870: in _test_batchnorm_impl
mx.nd.waitall()
python/mxnet/ndarray/ndarray.py:211: in waitall
check_call(_LIB.MXNDArrayWaitAll())
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
ret = -1
def check_call(ret):
"""Check the return value of C API call.
This function will raise an exception when an error occurs.
Wrap every API call with this function.
Parameters
----------
ret : int
return value from API calls.
"""
if ret != 0:
> raise get_last_ffi_error()
E mxnet.base.MXNetError: Traceback (most recent call last):
E [bt] (9) /work/mxnet/python/mxnet/../../build/libmxnet.so(std::thread::_State_impl<std::thread::_Invoker<std::tuple<std::function<void (std::shared_ptr<dmlc::ManualEvent>)>, std::shared_ptr<dmlc::ManualEvent> > > >::_M_run()+0x4a) [0x7f23de99d4da]
E [bt] (8) /work/mxnet/python/mxnet/../../build/libmxnet.so(std::_Function_handler<void (std::shared_ptr<dmlc::ManualEvent>), mxnet::engine::ThreadedEnginePerDevice::PushToExecute(mxnet::engine::OprBlock*, bool)::{lambda()#4}::operator()() const::{lambda(std::shared_ptr<dmlc::ManualEvent>)#1}>::_M_invoke(std::_Any_data const&, std::shared_ptr<dmlc::ManualEvent>&&)+0x4e) [0x7f23de9a17fe]
E [bt] (7) /work/mxnet/python/mxnet/../../build/libmxnet.so(void mxnet::engine::ThreadedEnginePerDevice::GPUWorker<(dmlc::ConcurrentQueueType)0>(mxnet::Context, bool, mxnet::engine::ThreadedEnginePerDevice::ThreadWorkerBlock<(dmlc::ConcurrentQueueType)0>*, std::shared_ptr<dmlc::ManualEvent> const&)+0x11d) [0x7f23de9a151d]
E [bt] (6) /work/mxnet/python/mxnet/../../build/libmxnet.so(mxnet::engine::ThreadedEngine::ExecuteOprBlock(mxnet::RunContext, mxnet::engine::OprBlock*)+0x121) [0x7f23de99e491]
E [bt] (5) /work/mxnet/python/mxnet/../../build/libmxnet.so(+0x20828ee) [0x7f23de9938ee]
E [bt] (4) /work/mxnet/python/mxnet/../../build/libmxnet.so(std::_Function_handler<void (mxnet::RunContext), mxnet::imperative::PushFCompute(std::function<void (nnvm::NodeAttrs const&, mxnet::OpContext const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&)> const&, nnvm::Op const*, nnvm::NodeAttrs const&, mxnet::Context const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::Resource, std::allocator<mxnet::Resource> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<unsigned int, std::allocator<unsigned int> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&)::{lambda(mxnet::RunContext)#1}>::_M_invoke(std::_Any_data const&, mxnet::RunContext&&)+0x17) [0x7f23dea6a027]
E [bt] (3) /work/mxnet/python/mxnet/../../build/libmxnet.so(mxnet::imperative::PushFCompute(std::function<void (nnvm::NodeAttrs const&, mxnet::OpContext const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&)> const&, nnvm::Op const*, nnvm::NodeAttrs const&, mxnet::Context const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::Resource, std::allocator<mxnet::Resource> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<unsigned int, std::allocator<unsigned int> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&)::{lambda(mxnet::RunContext)#1}::operator()(mxnet::RunContext) const+0x934) [0x7f23dea69bd4]
E [bt] (2) /work/mxnet/python/mxnet/../../build/libmxnet.so(void mxnet::op::NumpyNormalForward<mshadow::gpu>(nnvm::NodeAttrs const&, mxnet::OpContext const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&)+0xdf8) [0x7f23e49d5708]
E [bt] (1) /work/mxnet/python/mxnet/../../build/libmxnet.so(mshadow::Random<mshadow::gpu, float>::GenGaussian(float*, unsigned long, float, float)+0x15e) [0x7f23e1b271fe]
E [bt] (0) /work/mxnet/python/mxnet/../../build/libmxnet.so(dmlc::LogMessageFatal::~LogMessageFatal()+0x7f) [0x7f23de7900cf]
E File "/work/mxnet/include/mshadow/./random.h", line 380
E MXNetError: Check failed: status == CURAND_STATUS_SUCCESS (202 vs. 0) : CURAND Gen Normal float failed. size = 0,mu = 0,sigma = 1
python/mxnet/base.py:246: MXNetError
----------------------------- Captured stdout call -----------------------------
(<function BatchNorm at 0x7f22dab688c8>, (24, 2), 0, False)
```
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
users@infra.apache.org