You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by GitBox <gi...@apache.org> on 2018/03/29 21:39:30 UTC

[GitHub] marcoabreu opened a new issue #10322: test_kvstore_gpu.test_rsp_push_pull out of memory

marcoabreu opened a new issue #10322: test_kvstore_gpu.test_rsp_push_pull out of memory
URL: https://github.com/apache/incubator-mxnet/issues/10322
 
 
   ```
   ======================================================================
   
   ERROR: test_kvstore_gpu.test_rsp_push_pull
   
   ----------------------------------------------------------------------
   
   Traceback (most recent call last):
   
     File "/usr/lib/python3.6/site-packages/nose/case.py", line 198, in runTest
   
       self.test(*self.arg)
   
     File "/work/mxnet/tests/python/gpu/../unittest/common.py", line 157, in test_new
   
       orig_test(*args, **kwargs)
   
     File "/work/mxnet/tests/python/gpu/test_kvstore_gpu.py", line 91, in test_rsp_push_pull
   
       check_rsp_push_pull('device')
   
     File "/work/mxnet/tests/python/gpu/test_kvstore_gpu.py", line 82, in check_rsp_push_pull
   
       check_rsp_pull(kv, 4, [mx.gpu(i//2) for i in range(4)])
   
     File "/work/mxnet/tests/python/gpu/test_kvstore_gpu.py", line 73, in check_rsp_pull
   
       retained = val.asnumpy()
   
     File "/work/mxnet/python/mxnet/ndarray/sparse.py", line 180, in asnumpy
   
       return self.tostype('default').asnumpy()
   
     File "/work/mxnet/python/mxnet/ndarray/ndarray.py", line 1826, in asnumpy
   
       ctypes.c_size_t(data.size)))
   
     File "/work/mxnet/python/mxnet/base.py", line 149, in check_call
   
       raise MXNetError(py_str(_LIB.MXGetLastError()))
   
   mxnet.base.MXNetError: [17:02:41] src/operator/tensor/./../mxnet_op.h:576: Check failed: err == cudaSuccess (2 vs. 0) Name: mxnet_generic_kernel ErrStr:out of memory
   
   
   
   Stack trace returned 10 entries:
   
   [bt] (0) /work/mxnet/python/mxnet/../../lib/libmxnet.so(dmlc::StackTrace()+0x42) [0x7f260c0384e2]
   
   [bt] (1) /work/mxnet/python/mxnet/../../lib/libmxnet.so(dmlc::LogMessageFatal::~LogMessageFatal()+0x18) [0x7f260c038a88]
   
   [bt] (2) /work/mxnet/python/mxnet/../../lib/libmxnet.so(void mxnet::op::mxnet_op::Kernel<mxnet::op::mxnet_op::set_to_int<0>, mshadow::gpu>::Launch<float*>(mshadow::Stream<mshadow::gpu>*, int, float*)+0x150) [0x7f260f4f62d0]
   
   [bt] (3) /work/mxnet/python/mxnet/../../lib/libmxnet.so(void mxnet::op::CastStorageRspDnsImpl<mshadow::gpu>(mxnet::OpContext const&, mxnet::NDArray const&, mxnet::TBlob*)+0x2650) [0x7f260f5cd5d0]
   
   [bt] (4) /work/mxnet/python/mxnet/../../lib/libmxnet.so(void mxnet::op::CastStorageComputeImpl<mshadow::gpu>(mxnet::OpContext const&, mxnet::NDArray const&, mxnet::NDArray const&)+0x338) [0x7f260f5d48b8]
   
   [bt] (5) /work/mxnet/python/mxnet/../../lib/libmxnet.so(void mxnet::op::CastStorageComputeEx<mshadow::gpu>(nnvm::NodeAttrs const&, mxnet::OpContext const&, std::vector<mxnet::NDArray, std::allocator<mxnet::NDArray> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::NDArray, std::allocator<mxnet::NDArray> > const&)+0x3a3) [0x7f260f5d5713]
   
   [bt] (6) /work/mxnet/python/mxnet/../../lib/libmxnet.so(std::_Function_handler<void (mxnet::RunContext), mxnet::imperative::PushFComputeEx(std::function<void (nnvm::NodeAttrs const&, mxnet::OpContext const&, std::vector<mxnet::NDArray, std::allocator<mxnet::NDArray> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::NDArray, std::allocator<mxnet::NDArray> > const&)> const&, nnvm::Op const*, nnvm::NodeAttrs const&, mxnet::Context const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::Resource, std::allocator<mxnet::Resource> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&)::{lambda(mxnet::RunContext)#1}>::_M_invoke(std::_Any_data const&, mxnet::RunContext)+0x7b) [0x7f260e78e62b]
   
   [bt] (7) /work/mxnet/python/mxnet/../../lib/libmxnet.so(+0x381e603) [0x7f260ec52603]
   
   [bt] (8) /work/mxnet/python/mxnet/../../lib/libmxnet.so(mxnet::engine::ThreadedEngine::ExecuteOprBlock(mxnet::RunContext, mxnet::engine::OprBlock*)+0x572) [0x7f260ec4de02]
   
   [bt] (9) /work/mxnet/python/mxnet/../../lib/libmxnet.so(void mxnet::engine::ThreadedEnginePerDevice::GPUWorker<(dmlc::ConcurrentQueueType)0>(mxnet::Context, bool, mxnet::engine::ThreadedEnginePerDevice::ThreadWorkerBlock<(dmlc::ConcurrentQueueType)0>*, std::shared_ptr<dmlc::ManualEvent> const&)+0xdb) [0x7f260ec5cbdb]
   
   
   
   
   
   -------------------- >> begin captured logging << --------------------
   
   common: INFO: Setting module np/mx/python random seeds, use MXNET_MODULE_SEED=191628979 to reproduce.
   
   common: INFO: Setting test np/mx/python random seeds, use MXNET_TEST_SEED=728061287 to reproduce.
   
   --------------------- >> end captured logging << ---------------------
   ```
   
   http://jenkins.mxnet-ci.amazon-ml.com/blue/organizations/jenkins/incubator-mxnet/detail/PR-10313/2/pipeline/594

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services