You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by GitBox <gi...@apache.org> on 2018/10/02 09:21:15 UTC

[GitHub] marcoabreu opened a new issue #12715: Failing test: test_gluon_gpu.test_slice_batchnorm

marcoabreu opened a new issue #12715: Failing test: test_gluon_gpu.test_slice_batchnorm
URL: https://github.com/apache/incubator-mxnet/issues/12715
 
 
   This test is failing almost consistently. Three runs failed in a row.
   
   http://jenkins.mxnet-ci.amazon-ml.com/blue/organizations/jenkins/incubator-mxnet/detail/master/1696/pipeline/
   http://jenkins.mxnet-ci.amazon-ml.com/blue/organizations/jenkins/incubator-mxnet/detail/master/1697/pipeline/
   http://jenkins.mxnet-ci.amazon-ml.com/blue/organizations/jenkins/incubator-mxnet/detail/master/1698/pipeline/
   ```
   ======================================================================
   
   ERROR: test_gluon_gpu.test_slice_batchnorm
   
   ----------------------------------------------------------------------
   
   Traceback (most recent call last):
   
     File "/usr/local/lib/python3.5/dist-packages/nose/case.py", line 198, in runTest
   
       self.test(*self.arg)
   
     File "/usr/local/lib/python3.5/dist-packages/nose/util.py", line 620, in newfunc
   
       return func(*arg, **kw)
   
     File "/work/mxnet/tests/python/gpu/../unittest/common.py", line 172, in test_new
   
       orig_test(*args, **kwargs)
   
     File "/work/mxnet/tests/python/gpu/../unittest/test_gluon.py", line 1954, in test_slice_batchnorm
   
       check_layer_forward_withinput(net, x)
   
     File "/work/mxnet/tests/python/gpu/../unittest/test_gluon.py", line 1508, in check_layer_forward_withinput
   
       mx.test_utils.assert_almost_equal(x.grad.asnumpy(), x_hybrid.grad.asnumpy(), rtol=1e-5, atol=1e-6)
   
     File "/work/mxnet/python/mxnet/ndarray/ndarray.py", line 1980, in asnumpy
   
       ctypes.c_size_t(data.size)))
   
     File "/work/mxnet/python/mxnet/base.py", line 253, in check_call
   
       raise MXNetError(py_str(_LIB.MXGetLastError()))
   
   mxnet.base.MXNetError: [21:40:19] src/operator/nn/./cudnn/cudnn_convolution-inl.h:870: Failed to find any forward convolution algorithm.  with workspace size of 1073741824 bytes, please consider reducing batch/model size or increasing the workspace size
   
   
   
   Stack trace returned 10 entries:
   
   [bt] (0) /work/mxnet/python/mxnet/../../lib/libmxnet.so(dmlc::StackTrace[abi:cxx11]()+0x1c7) [0x7f2a6dd0a9e7]
   
   [bt] (1) /work/mxnet/python/mxnet/../../lib/libmxnet.so(dmlc::LogMessageFatal::~LogMessageFatal()+0x32) [0x7f2a6dd0ae92]
   
   [bt] (2) /work/mxnet/python/mxnet/../../lib/libmxnet.so(mxnet::op::CuDNNConvolutionOp<float>::SelectAlgo(mxnet::RunContext const&, std::vector<nnvm::TShape, std::allocator<nnvm::TShape> > const&, std::vector<nnvm::TShape, std::allocator<nnvm::TShape> > const&, cudnnDataType_t, cudnnDataType_t)+0xea6) [0x7f2a740b2856]
   
   [bt] (3) /work/mxnet/python/mxnet/../../lib/libmxnet.so(mxnet::op::CuDNNConvolutionOp<float>::Init(mxnet::op::ConvolutionParam const&, int, int, std::vector<nnvm::TShape, std::allocator<nnvm::TShape> > const&, std::vector<nnvm::TShape, std::allocator<nnvm::TShape> > const&, mxnet::RunContext const&, bool)+0xac3) [0x7f2a740b5fb3]
   
   [bt] (4) /work/mxnet/python/mxnet/../../lib/libmxnet.so(+0x805b3b3) [0x7f2a7405d3b3]
   
   [bt] (5) /work/mxnet/python/mxnet/../../lib/libmxnet.so(void mxnet::op::ConvolutionCompute<mshadow::gpu>(nnvm::NodeAttrs const&, mxnet::OpContext const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&)+0xfac) [0x7f2a7405e64c]
   
   [bt] (6) /work/mxnet/python/mxnet/../../lib/libmxnet.so(std::_Function_handler<void (nnvm::NodeAttrs const&, mxnet::OpContext const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&), void (*)(nnvm::NodeAttrs const&, mxnet::OpContext const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&)>::_M_invoke(std::_Any_data const&, nnvm::NodeAttrs const&, mxnet::OpContext const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&)+0x20) [0x7f2a6dd051c0]
   
   [bt] (7) /work/mxnet/python/mxnet/../../lib/libmxnet.so(mxnet::imperative::PushFCompute(std::function<void (nnvm::NodeAttrs const&, mxnet::OpContext const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&)> const&, nnvm::Op const*, nnvm::NodeAttrs const&, mxnet::Context const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::Resource, std::allocator<mxnet::Resource> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<unsigned int, std::allocator<unsigned int> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&)::{lambda(mxnet::RunContext)#1}::operator()(mxnet::RunContext) const+0x2cd) [0x7f2a70fb603d]
   
   [bt] (8) /work/mxnet/python/mxnet/../../lib/libmxnet.so(std::_Function_handler<void (mxnet::RunContext), mxnet::imperative::PushFCompute(std::function<void (nnvm::NodeAttrs const&, mxnet::OpContext const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&)> const&, nnvm::Op const*, nnvm::NodeAttrs const&, mxnet::Context const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::Resource, std::allocator<mxnet::Resource> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<unsigned int, std::allocator<unsigned int> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&)::{lambda(mxnet::RunContext)#1}>::_M_invoke(std::_Any_data const&, mxnet::RunContext&&)+0x1e) [0x7f2a70fb638e]
   
   [bt] (9) /work/mxnet/python/mxnet/../../lib/libmxnet.so(+0x576686b) [0x7f2a7176886b]
   
   
   
   
   
   -------------------- >> begin captured logging << --------------------
   
   common: INFO: Setting test np/mx/python random seeds, use MXNET_TEST_SEED=560052399 to reproduce.
   
   --------------------- >> end captured logging << ---------------------
   ```

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services