You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by GitBox <gi...@apache.org> on 2018/10/22 19:21:12 UTC

[GitHub] vishaalkapoor commented on issue #12903: Failing Straight Dope test - test_pixel2pixel (test_notebooks_single_gpu.StraightDopeSingleGpuTests)

vishaalkapoor commented on issue #12903: Failing Straight Dope test - test_pixel2pixel (test_notebooks_single_gpu.StraightDopeSingleGpuTests)
URL: https://github.com/apache/incubator-mxnet/issues/12903#issuecomment-431869052
 
 
   There haven't been any changes to the Straight Dope book in the last week at least: https://github.com/zackchase/mxnet-the-straight-dope/commits/master
   
   It's possible the notebook is flaky, or an error was introduced to MXNet. The notebook is here: https://gluon.mxnet.io/chapter14_generative-adversarial-networks/pixel2pixel.html.
   
   Here's the error from http://jenkins.mxnet-ci.amazon-ml.com/job/NightlyTestsForBinaries/job/master/111/console
   
   
   ```
   [StraightDope: Python2 Single-GPU] ERROR:root:An error occurred while executing the following cell:
   [StraightDope: Python2 Single-GPU] ------------------
   [StraightDope: Python2 Single-GPU] datasets = ['cityscapes', 'maps']
   [StraightDope: Python2 Single-GPU] is_reversed = False
   [StraightDope: Python2 Single-GPU] batch_size = 64
   [StraightDope: Python2 Single-GPU] 
   [StraightDope: Python2 Single-GPU] for dataset in datasets:
   [StraightDope: Python2 Single-GPU]     train_img_path = '%s/train' % (dataset)
   [StraightDope: Python2 Single-GPU]     val_img_path = '%s/val' % (dataset)
   [StraightDope: Python2 Single-GPU]     download_data(dataset)
   [StraightDope: Python2 Single-GPU]     train_data = load_data(train_img_path, batch_size, is_reversed=is_reversed)
   [StraightDope: Python2 Single-GPU]     val_data = load_data(val_img_path, batch_size, is_reversed=is_reversed)
   [StraightDope: Python2 Single-GPU] 
   [StraightDope: Python2 Single-GPU]     print("Preview %s training data:" % (dataset))
   [StraightDope: Python2 Single-GPU]     preview_train_data()
   [StraightDope: Python2 Single-GPU] 
   [StraightDope: Python2 Single-GPU]     netG, netD, trainerG, trainerD = set_network()
   [StraightDope: Python2 Single-GPU]     train()
   [StraightDope: Python2 Single-GPU]     
   [StraightDope: Python2 Single-GPU]     print("Training result for %s" % (dataset))
   [StraightDope: Python2 Single-GPU]     print_result()
   [StraightDope: Python2 Single-GPU]     
   ```
   ```
    
    MXNetErrorTraceback (most recent call last)
    <ipython-input-11-8977a6d8e84f> in <module>()
         14 
         15     netG, netD, trainerG, trainerD = set_network()
    ---> 16     train()
         17 
         18     print("Training result for %s" % (dataset))
    
    <ipython-input-9-8b406c4588fa> in train()
         35                 fake_label = nd.zeros(output.shape, ctx=ctx)
         36                 errD_fake = GAN_loss(output, fake_label)
    ---> 37                 metric.update([fake_label,], [output,])
         38 
         39                 # Train with real image
    
    /work/mxnet/python/mxnet/metric.pyc in update(self, labels, preds)
       1374         for pred, label in zip(preds, labels):
       1375             label = label.asnumpy()
    -> 1376             pred = pred.asnumpy()
       1377 
       1378             reval = self._feval(label, pred)
    
    /work/mxnet/python/mxnet/ndarray/ndarray.pyc in asnumpy(self)
       1978             self.handle,
       1979             data.ctypes.data_as(ctypes.c_void_p),
    -> 1980             ctypes.c_size_t(data.size)))
       1981         return data
       1982 
    
    /work/mxnet/python/mxnet/base.pyc in check_call(ret)
        250     """
        251     if ret != 0:
    --> 252         raise MXNetError(py_str(_LIB.MXGetLastError()))
        253 
        254 
    
    MXNetError: [20:48:26] /work/mxnet/3rdparty/mshadow/mshadow/./stream_gpu-inl.h:62: Check failed: e == cudaSuccess CUDA: an illegal memory access was encountered
    
    Stack trace returned 10 entries:
    (0) /work/mxnet/python/mxnet/../../lib/libmxnet.so(dmlc::StackTracebi:cxx11]()+0x1c7) 7f64d6e64bb7]
    (1) /work/mxnet/python/mxnet/../../lib/libmxnet.so(dmlc::LogMessageFatal::~LogMessageFatal()+0x32) 7f64d6e65062]
    (2) /work/mxnet/python/mxnet/../../lib/libmxnet.so(mshadow::Stream<mshadow::gpu>::Wait()+0x120) 7f64da24e8c0]
    (3) /work/mxnet/python/mxnet/../../lib/libmxnet.so(mxnet::imperative::PushFCompute(std::function<void (nnvm::NodeAttrs const&, mxnet::OpContext const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&)> const&, nnvm::Op const*, nnvm::NodeAttrs const&, mxnet::Context const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::Resource, std::allocator<mxnet::Resource> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<unsigned int, std::allocator<unsigned int> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&)::{lambda(mxnet::RunContext)#1}::operator()(mxnet::RunContext) const+0x42c) 7f64da2abe4c]
    (4) /work/mxnet/python/mxnet/../../lib/libmxnet.so(std::_Function_handler<void (mxnet::RunContext), mxnet::imperative::PushFCompute(std::function<void (nnvm::NodeAttrs const&, mxnet::OpContext const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&)> const&, nnvm::Op const*, nnvm::NodeAttrs const&, mxnet::Context const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::Resource, std::allocator<mxnet::Resource> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<unsigned int, std::allocator<unsigned int> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&)::{lambda(mxnet::RunContext)#1}>::_M_invoke(std::_Any_data const&, mxnet::RunContext&&)+0x1e) 7f64da2ac03e]
    (5) /work/mxnet/python/mxnet/../../lib/libmxnet.so(std::_Function_handler<void (mxnet::RunContext), mxnet::engine::ThreadedEngine::BulkAppend(std::function<void (mxnet::RunContext)>, mxnet::Context, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&)::{lambda(mxnet::RunContext)#1}>::_M_invoke(std::_Any_data const&, mxnet::RunContext&&)+0x57) 7f64daaa7017]
    (6) /work/mxnet/python/mxnet/../../lib/libmxnet.so(std::_Function_handler<void (mxnet::RunContext), mxnet::engine::ThreadedEngine::BulkAppend(std::function<void (mxnet::RunContext)>, mxnet::Context, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&)::{lambda(mxnet::RunContext)#1}>::_M_invoke(std::_Any_data const&, mxnet::RunContext&&)+0x57) 7f64daaa7017]
    (7) /work/mxnet/python/mxnet/../../lib/libmxnet.so(std::_Function_handler<void (mxnet::RunContext), mxnet::engine::ThreadedEngine::BulkAppend(std::function<void (mxnet::RunContext)>, mxnet::Context, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&)::{lambda(mxnet::RunContext)#1}>::_M_invoke(std::_Any_data const&, mxnet::RunContext&&)+0x57) 7f64daaa7017]
    (8) /work/mxnet/python/mxnet/../../lib/libmxnet.so(std::_Function_handler<void (mxnet::RunContext), mxnet::engine::ThreadedEngine::BulkAppend(std::function<void (mxnet::RunContext)>, mxnet::Context, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&)::{lambda(mxnet::RunContext)#1}>::_M_invoke(std::_Any_data const&, mxnet::RunContext&&)+0x57) 7f64daaa7017]
    (9) /work/mxnet/python/mxnet/../../lib/libmxnet.so(std::_Function_handler<void (mxnet::RunContext), mxnet::engine::ThreadedEngine::BulkAppend(std::function<void (mxnet::RunContext)>, mxnet::Context, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&)::{lambda(mxnet::RunContext)#1}>::_M_invoke(std::_Any_data const&, mxnet::RunContext&&)+0x57) 7f64daaa7017]
    
    
    MXNetError: [20:48:26] /work/mxnet/3rdparty/mshadow/mshadow/./stream_gpu-inl.h:62: Check failed: e == cudaSuccess CUDA: an illegal memory access was encountered
    
    Stack trace returned 10 entries:
    (0) /work/mxnet/python/mxnet/../../lib/libmxnet.so(dmlc::StackTracebi:cxx11]()+0x1c7) 7f64d6e64bb7]
    (1) /work/mxnet/python/mxnet/../../lib/libmxnet.so(dmlc::LogMessageFatal::~LogMessageFatal()+0x32) 7f64d6e65062]
    (2) /work/mxnet/python/mxnet/../../lib/libmxnet.so(mshadow::Stream<mshadow::gpu>::Wait()+0x120) 7f64da24e8c0]
    (3) /work/mxnet/python/mxnet/../../lib/libmxnet.so(mxnet::imperative::PushFCompute(std::function<void (nnvm::NodeAttrs const&, mxnet::OpContext const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&)> const&, nnvm::Op const*, nnvm::NodeAttrs const&, mxnet::Context const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::Resource, std::allocator<mxnet::Resource> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<unsigned int, std::allocator<unsigned int> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&)::{lambda(mxnet::RunContext)#1}::operator()(mxnet::RunContext) const+0x42c) 7f64da2abe4c]
    (4) /work/mxnet/python/mxnet/../../lib/libmxnet.so(std::_Function_handler<void (mxnet::RunContext), mxnet::imperative::PushFCompute(std::function<void (nnvm::NodeAttrs const&, mxnet::OpContext const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&)> const&, nnvm::Op const*, nnvm::NodeAttrs const&, mxnet::Context const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::Resource, std::allocator<mxnet::Resource> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<unsigned int, std::allocator<unsigned int> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&)::{lambda(mxnet::RunContext)#1}>::_M_invoke(std::_Any_data const&, mxnet::RunContext&&)+0x1e) 7f64da2ac03e]
    (5) /work/mxnet/python/mxnet/../../lib/libmxnet.so(std::_Function_handler<void (mxnet::RunContext), mxnet::engine::ThreadedEngine::BulkAppend(std::function<void (mxnet::RunContext)>, mxnet::Context, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&)::{lambda(mxnet::RunContext)#1}>::_M_invoke(std::_Any_data const&, mxnet::RunContext&&)+0x57) 7f64daaa7017]
    (6) /work/mxnet/python/mxnet/../../lib/libmxnet.so(std::_Function_handler<void (mxnet::RunContext), mxnet::engine::ThreadedEngine::BulkAppend(std::function<void (mxnet::RunContext)>, mxnet::Context, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&)::{lambda(mxnet::RunContext)#1}>::_M_invoke(std::_Any_data const&, mxnet::RunContext&&)+0x57) 7f64daaa7017]
    (7) /work/mxnet/python/mxnet/../../lib/libmxnet.so(std::_Function_handler<void (mxnet::RunContext), mxnet::engine::ThreadedEngine::BulkAppend(std::function<void (mxnet::RunContext)>, mxnet::Context, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&)::{lambda(mxnet::RunContext)#1}>::_M_invoke(std::_Any_data const&, mxnet::RunContext&&)+0x57) 7f64daaa7017]
    (8) /work/mxnet/python/mxnet/../../lib/libmxnet.so(std::_Function_handler<void (mxnet::RunContext), mxnet::engine::ThreadedEngine::BulkAppend(std::function<void (mxnet::RunContext)>, mxnet::Context, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&)::{lambda(mxnet::RunContext)#1}>::_M_invoke(std::_Any_data const&, mxnet::RunContext&&)+0x57) 7f64daaa7017]
    (9) /work/mxnet/python/mxnet/../../lib/libmxnet.so(std::_Function_handler<void (mxnet::RunContext), mxnet::engine::ThreadedEngine::BulkAppend(std::function<void (mxnet::RunContext)>, mxnet::Context, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&)::{lambda(mxnet::RunContext)#1}>::_M_invoke(std::_Any_data const&, mxnet::RunContext&&)+0x57) 7f64daaa7017]
   
   ```
   
   Vishaal

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services