You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by GitBox <gi...@apache.org> on 2018/10/22 19:21:12 UTC
[GitHub] vishaalkapoor commented on issue #12903: Failing Straight Dope test
- test_pixel2pixel (test_notebooks_single_gpu.StraightDopeSingleGpuTests)
vishaalkapoor commented on issue #12903: Failing Straight Dope test - test_pixel2pixel (test_notebooks_single_gpu.StraightDopeSingleGpuTests)
URL: https://github.com/apache/incubator-mxnet/issues/12903#issuecomment-431869052
There haven't been any changes to the Straight Dope book in the last week at least: https://github.com/zackchase/mxnet-the-straight-dope/commits/master
It's possible the notebook is flaky, or an error was introduced to MXNet. The notebook is here: https://gluon.mxnet.io/chapter14_generative-adversarial-networks/pixel2pixel.html.
Here's the error from http://jenkins.mxnet-ci.amazon-ml.com/job/NightlyTestsForBinaries/job/master/111/console
```
[StraightDope: Python2 Single-GPU] ERROR:root:An error occurred while executing the following cell:
[StraightDope: Python2 Single-GPU] ------------------
[StraightDope: Python2 Single-GPU] datasets = ['cityscapes', 'maps']
[StraightDope: Python2 Single-GPU] is_reversed = False
[StraightDope: Python2 Single-GPU] batch_size = 64
[StraightDope: Python2 Single-GPU]
[StraightDope: Python2 Single-GPU] for dataset in datasets:
[StraightDope: Python2 Single-GPU] train_img_path = '%s/train' % (dataset)
[StraightDope: Python2 Single-GPU] val_img_path = '%s/val' % (dataset)
[StraightDope: Python2 Single-GPU] download_data(dataset)
[StraightDope: Python2 Single-GPU] train_data = load_data(train_img_path, batch_size, is_reversed=is_reversed)
[StraightDope: Python2 Single-GPU] val_data = load_data(val_img_path, batch_size, is_reversed=is_reversed)
[StraightDope: Python2 Single-GPU]
[StraightDope: Python2 Single-GPU] print("Preview %s training data:" % (dataset))
[StraightDope: Python2 Single-GPU] preview_train_data()
[StraightDope: Python2 Single-GPU]
[StraightDope: Python2 Single-GPU] netG, netD, trainerG, trainerD = set_network()
[StraightDope: Python2 Single-GPU] train()
[StraightDope: Python2 Single-GPU]
[StraightDope: Python2 Single-GPU] print("Training result for %s" % (dataset))
[StraightDope: Python2 Single-GPU] print_result()
[StraightDope: Python2 Single-GPU]
```
```
MXNetErrorTraceback (most recent call last)
<ipython-input-11-8977a6d8e84f> in <module>()
14
15 netG, netD, trainerG, trainerD = set_network()
---> 16 train()
17
18 print("Training result for %s" % (dataset))
<ipython-input-9-8b406c4588fa> in train()
35 fake_label = nd.zeros(output.shape, ctx=ctx)
36 errD_fake = GAN_loss(output, fake_label)
---> 37 metric.update([fake_label,], [output,])
38
39 # Train with real image
/work/mxnet/python/mxnet/metric.pyc in update(self, labels, preds)
1374 for pred, label in zip(preds, labels):
1375 label = label.asnumpy()
-> 1376 pred = pred.asnumpy()
1377
1378 reval = self._feval(label, pred)
/work/mxnet/python/mxnet/ndarray/ndarray.pyc in asnumpy(self)
1978 self.handle,
1979 data.ctypes.data_as(ctypes.c_void_p),
-> 1980 ctypes.c_size_t(data.size)))
1981 return data
1982
/work/mxnet/python/mxnet/base.pyc in check_call(ret)
250 """
251 if ret != 0:
--> 252 raise MXNetError(py_str(_LIB.MXGetLastError()))
253
254
MXNetError: [20:48:26] /work/mxnet/3rdparty/mshadow/mshadow/./stream_gpu-inl.h:62: Check failed: e == cudaSuccess CUDA: an illegal memory access was encountered
Stack trace returned 10 entries:
(0) /work/mxnet/python/mxnet/../../lib/libmxnet.so(dmlc::StackTracebi:cxx11]()+0x1c7) 7f64d6e64bb7]
(1) /work/mxnet/python/mxnet/../../lib/libmxnet.so(dmlc::LogMessageFatal::~LogMessageFatal()+0x32) 7f64d6e65062]
(2) /work/mxnet/python/mxnet/../../lib/libmxnet.so(mshadow::Stream<mshadow::gpu>::Wait()+0x120) 7f64da24e8c0]
(3) /work/mxnet/python/mxnet/../../lib/libmxnet.so(mxnet::imperative::PushFCompute(std::function<void (nnvm::NodeAttrs const&, mxnet::OpContext const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&)> const&, nnvm::Op const*, nnvm::NodeAttrs const&, mxnet::Context const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::Resource, std::allocator<mxnet::Resource> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<unsigned int, std::allocator<unsigned int> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&)::{lambda(mxnet::RunContext)#1}::operator()(mxnet::RunContext) const+0x42c) 7f64da2abe4c]
(4) /work/mxnet/python/mxnet/../../lib/libmxnet.so(std::_Function_handler<void (mxnet::RunContext), mxnet::imperative::PushFCompute(std::function<void (nnvm::NodeAttrs const&, mxnet::OpContext const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&)> const&, nnvm::Op const*, nnvm::NodeAttrs const&, mxnet::Context const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::Resource, std::allocator<mxnet::Resource> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<unsigned int, std::allocator<unsigned int> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&)::{lambda(mxnet::RunContext)#1}>::_M_invoke(std::_Any_data const&, mxnet::RunContext&&)+0x1e) 7f64da2ac03e]
(5) /work/mxnet/python/mxnet/../../lib/libmxnet.so(std::_Function_handler<void (mxnet::RunContext), mxnet::engine::ThreadedEngine::BulkAppend(std::function<void (mxnet::RunContext)>, mxnet::Context, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&)::{lambda(mxnet::RunContext)#1}>::_M_invoke(std::_Any_data const&, mxnet::RunContext&&)+0x57) 7f64daaa7017]
(6) /work/mxnet/python/mxnet/../../lib/libmxnet.so(std::_Function_handler<void (mxnet::RunContext), mxnet::engine::ThreadedEngine::BulkAppend(std::function<void (mxnet::RunContext)>, mxnet::Context, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&)::{lambda(mxnet::RunContext)#1}>::_M_invoke(std::_Any_data const&, mxnet::RunContext&&)+0x57) 7f64daaa7017]
(7) /work/mxnet/python/mxnet/../../lib/libmxnet.so(std::_Function_handler<void (mxnet::RunContext), mxnet::engine::ThreadedEngine::BulkAppend(std::function<void (mxnet::RunContext)>, mxnet::Context, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&)::{lambda(mxnet::RunContext)#1}>::_M_invoke(std::_Any_data const&, mxnet::RunContext&&)+0x57) 7f64daaa7017]
(8) /work/mxnet/python/mxnet/../../lib/libmxnet.so(std::_Function_handler<void (mxnet::RunContext), mxnet::engine::ThreadedEngine::BulkAppend(std::function<void (mxnet::RunContext)>, mxnet::Context, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&)::{lambda(mxnet::RunContext)#1}>::_M_invoke(std::_Any_data const&, mxnet::RunContext&&)+0x57) 7f64daaa7017]
(9) /work/mxnet/python/mxnet/../../lib/libmxnet.so(std::_Function_handler<void (mxnet::RunContext), mxnet::engine::ThreadedEngine::BulkAppend(std::function<void (mxnet::RunContext)>, mxnet::Context, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&)::{lambda(mxnet::RunContext)#1}>::_M_invoke(std::_Any_data const&, mxnet::RunContext&&)+0x57) 7f64daaa7017]
MXNetError: [20:48:26] /work/mxnet/3rdparty/mshadow/mshadow/./stream_gpu-inl.h:62: Check failed: e == cudaSuccess CUDA: an illegal memory access was encountered
Stack trace returned 10 entries:
(0) /work/mxnet/python/mxnet/../../lib/libmxnet.so(dmlc::StackTracebi:cxx11]()+0x1c7) 7f64d6e64bb7]
(1) /work/mxnet/python/mxnet/../../lib/libmxnet.so(dmlc::LogMessageFatal::~LogMessageFatal()+0x32) 7f64d6e65062]
(2) /work/mxnet/python/mxnet/../../lib/libmxnet.so(mshadow::Stream<mshadow::gpu>::Wait()+0x120) 7f64da24e8c0]
(3) /work/mxnet/python/mxnet/../../lib/libmxnet.so(mxnet::imperative::PushFCompute(std::function<void (nnvm::NodeAttrs const&, mxnet::OpContext const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&)> const&, nnvm::Op const*, nnvm::NodeAttrs const&, mxnet::Context const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::Resource, std::allocator<mxnet::Resource> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<unsigned int, std::allocator<unsigned int> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&)::{lambda(mxnet::RunContext)#1}::operator()(mxnet::RunContext) const+0x42c) 7f64da2abe4c]
(4) /work/mxnet/python/mxnet/../../lib/libmxnet.so(std::_Function_handler<void (mxnet::RunContext), mxnet::imperative::PushFCompute(std::function<void (nnvm::NodeAttrs const&, mxnet::OpContext const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&)> const&, nnvm::Op const*, nnvm::NodeAttrs const&, mxnet::Context const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::Resource, std::allocator<mxnet::Resource> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<unsigned int, std::allocator<unsigned int> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&)::{lambda(mxnet::RunContext)#1}>::_M_invoke(std::_Any_data const&, mxnet::RunContext&&)+0x1e) 7f64da2ac03e]
(5) /work/mxnet/python/mxnet/../../lib/libmxnet.so(std::_Function_handler<void (mxnet::RunContext), mxnet::engine::ThreadedEngine::BulkAppend(std::function<void (mxnet::RunContext)>, mxnet::Context, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&)::{lambda(mxnet::RunContext)#1}>::_M_invoke(std::_Any_data const&, mxnet::RunContext&&)+0x57) 7f64daaa7017]
(6) /work/mxnet/python/mxnet/../../lib/libmxnet.so(std::_Function_handler<void (mxnet::RunContext), mxnet::engine::ThreadedEngine::BulkAppend(std::function<void (mxnet::RunContext)>, mxnet::Context, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&)::{lambda(mxnet::RunContext)#1}>::_M_invoke(std::_Any_data const&, mxnet::RunContext&&)+0x57) 7f64daaa7017]
(7) /work/mxnet/python/mxnet/../../lib/libmxnet.so(std::_Function_handler<void (mxnet::RunContext), mxnet::engine::ThreadedEngine::BulkAppend(std::function<void (mxnet::RunContext)>, mxnet::Context, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&)::{lambda(mxnet::RunContext)#1}>::_M_invoke(std::_Any_data const&, mxnet::RunContext&&)+0x57) 7f64daaa7017]
(8) /work/mxnet/python/mxnet/../../lib/libmxnet.so(std::_Function_handler<void (mxnet::RunContext), mxnet::engine::ThreadedEngine::BulkAppend(std::function<void (mxnet::RunContext)>, mxnet::Context, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&)::{lambda(mxnet::RunContext)#1}>::_M_invoke(std::_Any_data const&, mxnet::RunContext&&)+0x57) 7f64daaa7017]
(9) /work/mxnet/python/mxnet/../../lib/libmxnet.so(std::_Function_handler<void (mxnet::RunContext), mxnet::engine::ThreadedEngine::BulkAppend(std::function<void (mxnet::RunContext)>, mxnet::Context, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&)::{lambda(mxnet::RunContext)#1}>::_M_invoke(std::_Any_data const&, mxnet::RunContext&&)+0x57) 7f64daaa7017]
```
Vishaal
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
With regards,
Apache Git Services