You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by GitBox <gi...@apache.org> on 2018/10/02 09:21:15 UTC
[GitHub] marcoabreu opened a new issue #12715: Failing test:
test_gluon_gpu.test_slice_batchnorm
marcoabreu opened a new issue #12715: Failing test: test_gluon_gpu.test_slice_batchnorm
URL: https://github.com/apache/incubator-mxnet/issues/12715
This test is failing almost consistently. Three runs failed in a row.
http://jenkins.mxnet-ci.amazon-ml.com/blue/organizations/jenkins/incubator-mxnet/detail/master/1696/pipeline/
http://jenkins.mxnet-ci.amazon-ml.com/blue/organizations/jenkins/incubator-mxnet/detail/master/1697/pipeline/
http://jenkins.mxnet-ci.amazon-ml.com/blue/organizations/jenkins/incubator-mxnet/detail/master/1698/pipeline/
```
======================================================================
ERROR: test_gluon_gpu.test_slice_batchnorm
----------------------------------------------------------------------
Traceback (most recent call last):
File "/usr/local/lib/python3.5/dist-packages/nose/case.py", line 198, in runTest
self.test(*self.arg)
File "/usr/local/lib/python3.5/dist-packages/nose/util.py", line 620, in newfunc
return func(*arg, **kw)
File "/work/mxnet/tests/python/gpu/../unittest/common.py", line 172, in test_new
orig_test(*args, **kwargs)
File "/work/mxnet/tests/python/gpu/../unittest/test_gluon.py", line 1954, in test_slice_batchnorm
check_layer_forward_withinput(net, x)
File "/work/mxnet/tests/python/gpu/../unittest/test_gluon.py", line 1508, in check_layer_forward_withinput
mx.test_utils.assert_almost_equal(x.grad.asnumpy(), x_hybrid.grad.asnumpy(), rtol=1e-5, atol=1e-6)
File "/work/mxnet/python/mxnet/ndarray/ndarray.py", line 1980, in asnumpy
ctypes.c_size_t(data.size)))
File "/work/mxnet/python/mxnet/base.py", line 253, in check_call
raise MXNetError(py_str(_LIB.MXGetLastError()))
mxnet.base.MXNetError: [21:40:19] src/operator/nn/./cudnn/cudnn_convolution-inl.h:870: Failed to find any forward convolution algorithm. with workspace size of 1073741824 bytes, please consider reducing batch/model size or increasing the workspace size
Stack trace returned 10 entries:
[bt] (0) /work/mxnet/python/mxnet/../../lib/libmxnet.so(dmlc::StackTrace[abi:cxx11]()+0x1c7) [0x7f2a6dd0a9e7]
[bt] (1) /work/mxnet/python/mxnet/../../lib/libmxnet.so(dmlc::LogMessageFatal::~LogMessageFatal()+0x32) [0x7f2a6dd0ae92]
[bt] (2) /work/mxnet/python/mxnet/../../lib/libmxnet.so(mxnet::op::CuDNNConvolutionOp<float>::SelectAlgo(mxnet::RunContext const&, std::vector<nnvm::TShape, std::allocator<nnvm::TShape> > const&, std::vector<nnvm::TShape, std::allocator<nnvm::TShape> > const&, cudnnDataType_t, cudnnDataType_t)+0xea6) [0x7f2a740b2856]
[bt] (3) /work/mxnet/python/mxnet/../../lib/libmxnet.so(mxnet::op::CuDNNConvolutionOp<float>::Init(mxnet::op::ConvolutionParam const&, int, int, std::vector<nnvm::TShape, std::allocator<nnvm::TShape> > const&, std::vector<nnvm::TShape, std::allocator<nnvm::TShape> > const&, mxnet::RunContext const&, bool)+0xac3) [0x7f2a740b5fb3]
[bt] (4) /work/mxnet/python/mxnet/../../lib/libmxnet.so(+0x805b3b3) [0x7f2a7405d3b3]
[bt] (5) /work/mxnet/python/mxnet/../../lib/libmxnet.so(void mxnet::op::ConvolutionCompute<mshadow::gpu>(nnvm::NodeAttrs const&, mxnet::OpContext const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&)+0xfac) [0x7f2a7405e64c]
[bt] (6) /work/mxnet/python/mxnet/../../lib/libmxnet.so(std::_Function_handler<void (nnvm::NodeAttrs const&, mxnet::OpContext const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&), void (*)(nnvm::NodeAttrs const&, mxnet::OpContext const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&)>::_M_invoke(std::_Any_data const&, nnvm::NodeAttrs const&, mxnet::OpContext const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&)+0x20) [0x7f2a6dd051c0]
[bt] (7) /work/mxnet/python/mxnet/../../lib/libmxnet.so(mxnet::imperative::PushFCompute(std::function<void (nnvm::NodeAttrs const&, mxnet::OpContext const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&)> const&, nnvm::Op const*, nnvm::NodeAttrs const&, mxnet::Context const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::Resource, std::allocator<mxnet::Resource> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<unsigned int, std::allocator<unsigned int> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&)::{lambda(mxnet::RunContext)#1}::operator()(mxnet::RunContext) const+0x2cd) [0x7f2a70fb603d]
[bt] (8) /work/mxnet/python/mxnet/../../lib/libmxnet.so(std::_Function_handler<void (mxnet::RunContext), mxnet::imperative::PushFCompute(std::function<void (nnvm::NodeAttrs const&, mxnet::OpContext const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&)> const&, nnvm::Op const*, nnvm::NodeAttrs const&, mxnet::Context const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::Resource, std::allocator<mxnet::Resource> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<unsigned int, std::allocator<unsigned int> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&)::{lambda(mxnet::RunContext)#1}>::_M_invoke(std::_Any_data const&, mxnet::RunContext&&)+0x1e) [0x7f2a70fb638e]
[bt] (9) /work/mxnet/python/mxnet/../../lib/libmxnet.so(+0x576686b) [0x7f2a7176886b]
-------------------- >> begin captured logging << --------------------
common: INFO: Setting test np/mx/python random seeds, use MXNET_TEST_SEED=560052399 to reproduce.
--------------------- >> end captured logging << ---------------------
```
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
With regards,
Apache Git Services