You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by GitBox <gi...@apache.org> on 2018/05/11 11:00:43 UTC

[GitHub] lovehuanhuan opened a new issue #10905: Why I can't run gpu model on jeston TX2?

lovehuanhuan opened a new issue #10905: Why I can't run gpu model on jeston TX2?
URL: https://github.com/apache/incubator-mxnet/issues/10905
 
 
   I compiled mxnet with gpu on jeston tx2 successfully, when i run #example/image-classification/train_mnist.py # with cpu,it's ok, but when i run it with #gpu='0,1,2,3'#, I got errors
   
   
   root@tegra-ubuntu:/home/nvidia/zhuzhipeng/mxnetTX2/mxnet/example/image-classification# python train_mnist.py
   INFO:root:start with arguments Namespace(add_stn=False, batch_size=64, disp_batches=100, dtype='float32', gc_threshold=0.5, gc_type='none', gpus='0,1,2,3', initializer='default', kv_store='device', load_epoch=None, loss='', lr=0.05, lr_factor=0.1, lr_step_epochs='10', macrobatch_size=0, model_prefix=None, mom=0.9, monitor=0, network='mlp', num_classes=10, num_epochs=20, num_examples=60000, num_layers=None, optimizer='sgd', test_io=0, top_k=0, warmup_epochs=5, warmup_strategy='linear', wd=0.0001)
   train_mnist.py:38: DeprecationWarning: The binary mode of fromstring is deprecated, as it behaves surprisingly on unicode inputs. Use frombuffer instead
     label = np.fromstring(flbl.read(), dtype=np.int8)
   train_mnist.py:41: DeprecationWarning: The binary mode of fromstring is deprecated, as it behaves surprisingly on unicode inputs. Use frombuffer instead
     image = np.fromstring(fimg.read(), dtype=np.uint8).reshape(len(label), rows, cols)
   Traceback (most recent call last):
     File "train_mnist.py", line 96, in <module>
       fit.fit(args, sym, get_mnist_iter)
     File "/home/nvidia/zhuzhipeng/mxnetTX2/mxnet/example/image-classification/common/fit.py", line 307, in fit
       monitor=monitor)
     File "/home/nvidia/zhuzhipeng/mxnetTX2/mxnet/python/mxnet/module/base_module.py", line 484, in fit
       for_training=True, force_rebind=force_rebind)
     File "/home/nvidia/zhuzhipeng/mxnetTX2/mxnet/python/mxnet/module/module.py", line 430, in bind
       state_names=self._state_names)
     File "/home/nvidia/zhuzhipeng/mxnetTX2/mxnet/python/mxnet/module/executor_group.py", line 265, in __init__
       self.bind_exec(data_shapes, label_shapes, shared_group)
     File "/home/nvidia/zhuzhipeng/mxnetTX2/mxnet/python/mxnet/module/executor_group.py", line 361, in bind_exec
       shared_group))
     File "/home/nvidia/zhuzhipeng/mxnetTX2/mxnet/python/mxnet/module/executor_group.py", line 639, in _bind_ith_exec
       shared_buffer=shared_data_arrays, **input_shapes)
     File "/home/nvidia/zhuzhipeng/mxnetTX2/mxnet/python/mxnet/symbol/symbol.py", line 1524, in simple_bind
       raise RuntimeError(error_msg)
   RuntimeError: simple_bind error. Arguments:
   data: (16, 1L, 28L, 28L)
   softmax_label: (16,)
   [18:59:00] src/storage/storage.cc:65: Check failed: e == cudaSuccess || e == cudaErrorCudartUnloading CUDA: invalid device ordinal
   
   Stack trace returned 10 entries:
   [bt] (0) /home/nvidia/zhuzhipeng/mxnetTX2/mxnet/python/mxnet/../../lib/libmxnet.so(dmlc::StackTrace[abi:cxx11]()+0x54) [0x7f63dbcd44]
   [bt] (1) /home/nvidia/zhuzhipeng/mxnetTX2/mxnet/python/mxnet/../../lib/libmxnet.so(dmlc::LogMessageFatal::~LogMessageFatal()+0x44) [0x7f63dbd5fc]
   [bt] (2) /home/nvidia/zhuzhipeng/mxnetTX2/mxnet/python/mxnet/../../lib/libmxnet.so(mxnet::StorageImpl::ActivateDevice(mxnet::Context)+0x148) [0x7f65f78bf8]
   [bt] (3) /home/nvidia/zhuzhipeng/mxnetTX2/mxnet/python/mxnet/../../lib/libmxnet.so(mxnet::StorageImpl::Alloc(mxnet::Storage::Handle*)+0x6c) [0x7f65f729d4]
   [bt] (4) /home/nvidia/zhuzhipeng/mxnetTX2/mxnet/python/mxnet/../../lib/libmxnet.so(mxnet::exec::InitZeros(mxnet::NDArrayStorageType, nnvm::TShape const&, mxnet::Context const&, int)+0x3cc) [0x7f65ff0a0c]
   [bt] (5) /home/nvidia/zhuzhipeng/mxnetTX2/mxnet/python/mxnet/../../lib/libmxnet.so(+0x2de5e14) [0x7f65fe2e14]
   [bt] (6) /home/nvidia/zhuzhipeng/mxnetTX2/mxnet/python/mxnet/../../lib/libmxnet.so(mxnet::exec::GraphExecutor::InitArguments(nnvm::IndexedGraph const&, std::vector<nnvm::TShape, std::allocator<nnvm::TShape> > const&, std::vector<int, std::allocator<int> > const&, std::vector<int, std::allocator<int> > const&, std::vector<mxnet::Context, std::allocator<mxnet::Context> > const&, std::vector<mxnet::Context, std::allocator<mxnet::Context> > const&, std::vector<mxnet::Context, std::allocator<mxnet::Context> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, std::unordered_set<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::hash<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::equal_to<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > > > const&, mxnet::Executor const*, std::unordered_map<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, mxnet::NDArray, std::hash<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::equal_to<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, mxnet::NDArray> > >*, std::vector<mxnet::NDArray, std::allocator<mxnet::NDArray> >*, std::vector<mxnet::NDArray, std::allocator<mxnet::NDArray> >*, std::vector<mxnet::NDArray, std::allocator<mxnet::NDArray> >*)+0xaa0) [0x7f65fe6440]
   [bt] (7) /home/nvidia/zhuzhipeng/mxnetTX2/mxnet/python/mxnet/../../lib/libmxnet.so(mxnet::exec::GraphExecutor::Init(nnvm::Symbol, mxnet::Context const&, std::map<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, mxnet::Context, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, mxnet::Context> > > const&, std::vector<mxnet::Context, std::allocator<mxnet::Context> > const&, std::vector<mxnet::Context, std::allocator<mxnet::Context> > const&, std::vector<mxnet::Context, std::allocator<mxnet::Context> > const&, std::unordered_map<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, nnvm::TShape, std::hash<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::equal_to<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, nnvm::TShape> > > const&, std::unordered_map<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, int, std::hash<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::equal_to<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, int> > > const&, std::unordered_map<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, int, std::hash<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::equal_to<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, int> > > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, std::unordered_set<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::hash<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::equal_to<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > > > const&, std::vector<mxnet::NDArray, std::allocator<mxnet::NDArray> >*, std::vector<mxnet::NDArray, std::allocator<mxnet::NDArray> >*, std::vector<mxnet::NDArray, std::allocator<mxnet::NDArray> >*, std::unordered_map<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, mxnet::NDArray, std::hash<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::equal_to<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, mxnet::NDArray> > >*, mxnet::Executor*, std::unordered_map<nnvm::NodeEntry, mxnet::NDArray, nnvm::NodeEntryHash, nnvm::NodeEntryEqual, std::allocator<std::pair<nnvm::NodeEntry const, mxnet::NDArray> > > const&)+0x748) [0x7f65fecce8]
   [bt] (8) /home/nvidia/zhuzhipeng/mxnetTX2/mxnet/python/mxnet/../../lib/libmxnet.so(mxnet::Executor::SimpleBind(nnvm::Symbol, mxnet::Context const&, std::map<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, mxnet::Context, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, mxnet::Context> > > const&, std::vector<mxnet::Context, std::allocator<mxnet::Context> > const&, std::vector<mxnet::Context, std::allocator<mxnet::Context> > const&, std::vector<mxnet::Context, std::allocator<mxnet::Context> > const&, std::unordered_map<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, nnvm::TShape, std::hash<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::equal_to<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, nnvm::TShape> > > const&, std::unordered_map<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, int, std::hash<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::equal_to<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, int> > > const&, std::unordered_map<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, int, std::hash<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::equal_to<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, int> > > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, std::unordered_set<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::hash<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::equal_to<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > > > const&, std::vector<mxnet::NDArray, std::allocator<mxnet::NDArray> >*, std::vector<mxnet::NDArray, std::allocator<mxnet::NDArray> >*, std::vector<mxnet::NDArray, std::allocator<mxnet::NDArray> >*, std::unordered_map<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, mxnet::NDArray, std::hash<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::equal_to<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, mxnet::NDArray> > >*, mxnet::Executor*)+0x10c) [0x7f65fed46c]
   [bt] (9) /home/nvidia/zhuzhipeng/mxnetTX2/mxnet/python/mxnet/../../lib/libmxnet.so(MXExecutorSimpleBind+0x23b0) [0x7f65f85d30]
   
   

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services