You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by GitBox <gi...@apache.org> on 2018/11/23 08:47:14 UTC

[GitHub] muhyun opened a new issue #13380: mxnet bind fails in nvidia jetson tx2 (jetpack 3.3, cuda 9.0)

muhyun opened a new issue #13380: mxnet bind fails in nvidia jetson tx2 (jetpack 3.3, cuda 9.0)
URL: https://github.com/apache/incubator-mxnet/issues/13380
 
 
   A python code to load an mxnet model fail in bind() function on Nvidia Jetson TX2 (JetPack 3.3), where mxnet is installed using AWS Greengrass's pre-built binary. The model is resnet-50 based transfer learning one.
   
   - nvidia jetson tx2 with jetpack 3.3
   - ubuntu 16.04
   - python 2.7
   - mxnet 1.2.1
   
   ```
   sym, arg_params, aux_params = mx.model.load_checkpoint(model_dir + 'image-classification', 20)
       mod = mx.mod.Module(symbol=sym, context=ctx, label_names=None)
       mod.bind(for_training=False, data_shapes=[('data', (1,3,128,128))], label_shapes=mod._label_shapes)
       mod.set_params(arg_params, aux_params, allow_missing=True)
       print('Model loaded successfully')
   ```
   
   The error message is;
   
   `[2018-11-22T10:21:41.559Z][FATAL]-[10:21:41] src/storage/storage.cc:119: Check failed: e == cudaSuccess || e == cudaErrorCudartUnloading CUDA: no CUDA-capable device is detected`
   
   , and the full message is;
   
   ```
   [2018-11-22T10:21:41.559Z][FATAL]-lambda_runtime.py:108,Failed to import handler function "greengrassHelloWorldCounter.function_handler" due to exception: simple_bind error. Arguments:
   [2018-11-22T10:21:41.559Z][FATAL]-data: (1, 3, 128, 128)
   [2018-11-22T10:21:41.559Z][FATAL]-[10:21:41] src/storage/storage.cc:119: Check failed: e == cudaSuccess || e == cudaErrorCudartUnloading CUDA: no CUDA-capable device is detected
   [2018-11-22T10:21:41.559Z][FATAL]-Stack trace returned 10 entries:
   [2018-11-22T10:21:41.559Z][FATAL]-[bt] (0) /usr/local/lib/python2.7/dist-packages/mxnet/libmxnet.so(dmlc::StackTrace[abi:cxx11]()+0x58) [0x7f786c6280]
   [2018-11-22T10:21:41.559Z][FATAL]-[bt] (1) /usr/local/lib/python2.7/dist-packages/mxnet/libmxnet.so(dmlc::LogMessageFatal::~LogMessageFatal()+0x44) [0x7f786c6bdc]
   [2018-11-22T10:21:41.559Z][FATAL]-[bt] (2) /usr/local/lib/python2.7/dist-packages/mxnet/libmxnet.so(+0x2ce60c8) [0x7f7a8480c8]
   [2018-11-22T10:21:41.559Z][FATAL]-[bt] (3) /usr/local/lib/python2.7/dist-packages/mxnet/libmxnet.so(+0x2ce73a4) [0x7f7a8493a4]
   [2018-11-22T10:21:41.559Z][FATAL]-[bt] (4) /usr/local/lib/python2.7/dist-packages/mxnet/libmxnet.so(mxnet::StorageImpl::Alloc(mxnet::Storage::Handle*)+0x64) [0x7f7a84a58c]
   [2018-11-22T10:21:41.559Z][FATAL]-[bt] (5) /usr/local/lib/python2.7/dist-packages/mxnet/libmxnet.so(mxnet::exec::InitZeros(mxnet::NDArrayStorageType, nnvm::TShape const&, mxnet::Context const&, int)+0x3cc) [0x7f7a36db5c]
   [2018-11-22T10:21:41.559Z][FATAL]-[bt] (6) /usr/local/lib/python2.7/dist-packages/mxnet/libmxnet.so(+0x27fab9c) [0x7f7a35cb9c]
   [2018-11-22T10:21:41.559Z][FATAL]-[bt] (7) /usr/local/lib/python2.7/dist-packages/mxnet/libmxnet.so(mxnet::exec::GraphExecutor::InitArguments(nnvm::IndexedGraph const&, std::vector<nnvm::TShape, std::allocator<nnvm::TShape> > const&, std::vector<int, std::allocator<int> > const&, std::vector<int, std::allocator<int> > const&, std::vector<mxnet::Context, std::allocator<mxnet::Context> > const&, std::vector<mxnet::Context, std::allocator<mxnet::Context> > const&, std::vector<mxnet::Context, std::allocator<mxnet::Context> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, std::unordered_set<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::hash<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::equal_to<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > > > const&, mxnet::Executor const*, std::unordered_map<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, mxnet::NDArray, std::hash<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::equal_to<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, mxnet::NDArray> > >*, std::vector<mxnet::NDArray, std::allocator<mxnet::NDArray> >*, std::vector<mxnet::NDArray, std::allocator<mxnet::NDArray> >*, std::vector<mxnet::NDArray, std::allocator<mxnet::NDArray> >*)+0xaa0) [0x7f7a3601c8]
   [2018-11-22T10:21:41.559Z][FATAL]-[bt] (8) /usr/local/lib/python2.7/dist-packages/mxnet/libmxnet.so(mxnet::exec::GraphExecutor::Init(nnvm::Symbol, mxnet::Context const&, std::map<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, mxnet::Context, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, mxnet::Context> > > const&, std::vector<mxnet::Context, std::allocator<mxnet::Context> > const&, std::vector<mxnet::Context, std::allocator<mxnet::Context> > const&, std::vector<mxnet::Context, std::allocator<mxnet::Context> > const&, std::unordered_map<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, nnvm::TShape, std::hash<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::equal_to<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, nnvm::TShape> > > const&, std::unordered_map<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, int, std::hash<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::equal_to<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, int> > > const&, std::unordered_map<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, int, std::hash<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::equal_to<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, int> > > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, std::unordered_set<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::hash<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::equal_to<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > > > const&, std::vector<mxnet::NDArray, std::allocator<mxnet::NDArray> >*, std::vector<mxnet::NDArray, std::allocator<mxnet::NDArray> >*, std::vector<mxnet::NDArray, std::allocator<mxnet::NDArray> >*, std::unordered_map<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, mxnet::NDArray, std::hash<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::equal_to<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, mxnet::NDArray> > >*, mxnet::Executor*, std::unordered_map<nnvm::NodeEntry, mxnet::NDArray, nnvm::NodeEntryHash, nnvm::NodeEntryEqual, std::allocator<std::pair<nnvm::NodeEntry const, mxnet::NDArray> > > const&)+0x748) [0x7f7a366b90]
   [2018-11-22T10:21:41.559Z][FATAL]-[bt] (9) /usr/local/lib/python2.7/dist-packages/mxnet/libmxnet.so(mxnet::Executor::SimpleBind(nnvm::Symbol, mxnet::Context const&, std::map<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, mxnet::Context, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, mxnet::Context> > > const&, std::vector<mxnet::Context, std::allocator<mxnet::Context> > const&, std::vector<mxnet::Context, std::allocator<mxnet::Context> > const&, std::vector<mxnet::Context, std::allocator<mxnet::Context> > const&, std::unordered_map<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, nnvm::TShape, std::hash<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::equal_to<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, nnvm::TShape> > > const&, std::unordered_map<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, int, std::hash<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::equal_to<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, int> > > const&, std::unordered_map<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, int, std::hash<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::equal_to<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, int> > > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, std::unordered_set<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::hash<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::equal_to<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > > > const&, std::vector<mxnet::NDArray, std::allocator<mxnet::NDArray> >*, std::vector<mxnet::NDArray, std::allocator<mxnet::NDArray> >*, std::vector<mxnet::NDArray, std::allocator<mxnet::NDArray> >*, std::unordered_map<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, mxnet::NDArray, std::hash<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::equal_to<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, mxnet::NDArray> > >*, mxnet::Executor*)+0x10c) [0x7f7a367314]
   ```
   
   Simple MNIST MLP training script runs fine and it uses GPU correctly.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services