You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by GitBox <gi...@apache.org> on 2018/05/11 11:00:43 UTC
[GitHub] lovehuanhuan opened a new issue #10905: Why I can't run gpu model
on jeston TX2?
lovehuanhuan opened a new issue #10905: Why I can't run gpu model on jeston TX2?
URL: https://github.com/apache/incubator-mxnet/issues/10905
I compiled mxnet with gpu on jeston tx2 successfully, when i run #example/image-classification/train_mnist.py # with cpu,it's ok, but when i run it with #gpu='0,1,2,3'#, I got errors
root@tegra-ubuntu:/home/nvidia/zhuzhipeng/mxnetTX2/mxnet/example/image-classification# python train_mnist.py
INFO:root:start with arguments Namespace(add_stn=False, batch_size=64, disp_batches=100, dtype='float32', gc_threshold=0.5, gc_type='none', gpus='0,1,2,3', initializer='default', kv_store='device', load_epoch=None, loss='', lr=0.05, lr_factor=0.1, lr_step_epochs='10', macrobatch_size=0, model_prefix=None, mom=0.9, monitor=0, network='mlp', num_classes=10, num_epochs=20, num_examples=60000, num_layers=None, optimizer='sgd', test_io=0, top_k=0, warmup_epochs=5, warmup_strategy='linear', wd=0.0001)
train_mnist.py:38: DeprecationWarning: The binary mode of fromstring is deprecated, as it behaves surprisingly on unicode inputs. Use frombuffer instead
label = np.fromstring(flbl.read(), dtype=np.int8)
train_mnist.py:41: DeprecationWarning: The binary mode of fromstring is deprecated, as it behaves surprisingly on unicode inputs. Use frombuffer instead
image = np.fromstring(fimg.read(), dtype=np.uint8).reshape(len(label), rows, cols)
Traceback (most recent call last):
File "train_mnist.py", line 96, in <module>
fit.fit(args, sym, get_mnist_iter)
File "/home/nvidia/zhuzhipeng/mxnetTX2/mxnet/example/image-classification/common/fit.py", line 307, in fit
monitor=monitor)
File "/home/nvidia/zhuzhipeng/mxnetTX2/mxnet/python/mxnet/module/base_module.py", line 484, in fit
for_training=True, force_rebind=force_rebind)
File "/home/nvidia/zhuzhipeng/mxnetTX2/mxnet/python/mxnet/module/module.py", line 430, in bind
state_names=self._state_names)
File "/home/nvidia/zhuzhipeng/mxnetTX2/mxnet/python/mxnet/module/executor_group.py", line 265, in __init__
self.bind_exec(data_shapes, label_shapes, shared_group)
File "/home/nvidia/zhuzhipeng/mxnetTX2/mxnet/python/mxnet/module/executor_group.py", line 361, in bind_exec
shared_group))
File "/home/nvidia/zhuzhipeng/mxnetTX2/mxnet/python/mxnet/module/executor_group.py", line 639, in _bind_ith_exec
shared_buffer=shared_data_arrays, **input_shapes)
File "/home/nvidia/zhuzhipeng/mxnetTX2/mxnet/python/mxnet/symbol/symbol.py", line 1524, in simple_bind
raise RuntimeError(error_msg)
RuntimeError: simple_bind error. Arguments:
data: (16, 1L, 28L, 28L)
softmax_label: (16,)
[18:59:00] src/storage/storage.cc:65: Check failed: e == cudaSuccess || e == cudaErrorCudartUnloading CUDA: invalid device ordinal
Stack trace returned 10 entries:
[bt] (0) /home/nvidia/zhuzhipeng/mxnetTX2/mxnet/python/mxnet/../../lib/libmxnet.so(dmlc::StackTrace[abi:cxx11]()+0x54) [0x7f63dbcd44]
[bt] (1) /home/nvidia/zhuzhipeng/mxnetTX2/mxnet/python/mxnet/../../lib/libmxnet.so(dmlc::LogMessageFatal::~LogMessageFatal()+0x44) [0x7f63dbd5fc]
[bt] (2) /home/nvidia/zhuzhipeng/mxnetTX2/mxnet/python/mxnet/../../lib/libmxnet.so(mxnet::StorageImpl::ActivateDevice(mxnet::Context)+0x148) [0x7f65f78bf8]
[bt] (3) /home/nvidia/zhuzhipeng/mxnetTX2/mxnet/python/mxnet/../../lib/libmxnet.so(mxnet::StorageImpl::Alloc(mxnet::Storage::Handle*)+0x6c) [0x7f65f729d4]
[bt] (4) /home/nvidia/zhuzhipeng/mxnetTX2/mxnet/python/mxnet/../../lib/libmxnet.so(mxnet::exec::InitZeros(mxnet::NDArrayStorageType, nnvm::TShape const&, mxnet::Context const&, int)+0x3cc) [0x7f65ff0a0c]
[bt] (5) /home/nvidia/zhuzhipeng/mxnetTX2/mxnet/python/mxnet/../../lib/libmxnet.so(+0x2de5e14) [0x7f65fe2e14]
[bt] (6) /home/nvidia/zhuzhipeng/mxnetTX2/mxnet/python/mxnet/../../lib/libmxnet.so(mxnet::exec::GraphExecutor::InitArguments(nnvm::IndexedGraph const&, std::vector<nnvm::TShape, std::allocator<nnvm::TShape> > const&, std::vector<int, std::allocator<int> > const&, std::vector<int, std::allocator<int> > const&, std::vector<mxnet::Context, std::allocator<mxnet::Context> > const&, std::vector<mxnet::Context, std::allocator<mxnet::Context> > const&, std::vector<mxnet::Context, std::allocator<mxnet::Context> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, std::unordered_set<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::hash<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::equal_to<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > > > const&, mxnet::Executor const*, std::unordered_map<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, mxnet::NDArray, std::hash<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::equal_to<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, mxnet::NDArray> > >*, std::vector<mxnet::NDArray, std::allocator<mxnet::NDArray> >*, std::vector<mxnet::NDArray, std::allocator<mxnet::NDArray> >*, std::vector<mxnet::NDArray, std::allocator<mxnet::NDArray> >*)+0xaa0) [0x7f65fe6440]
[bt] (7) /home/nvidia/zhuzhipeng/mxnetTX2/mxnet/python/mxnet/../../lib/libmxnet.so(mxnet::exec::GraphExecutor::Init(nnvm::Symbol, mxnet::Context const&, std::map<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, mxnet::Context, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, mxnet::Context> > > const&, std::vector<mxnet::Context, std::allocator<mxnet::Context> > const&, std::vector<mxnet::Context, std::allocator<mxnet::Context> > const&, std::vector<mxnet::Context, std::allocator<mxnet::Context> > const&, std::unordered_map<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, nnvm::TShape, std::hash<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::equal_to<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, nnvm::TShape> > > const&, std::unordered_map<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, int, std::hash<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::equal_to<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, int> > > const&, std::unordered_map<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, int, std::hash<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::equal_to<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, int> > > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, std::unordered_set<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::hash<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::equal_to<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > > > const&, std::vector<mxnet::NDArray, std::allocator<mxnet::NDArray> >*, std::vector<mxnet::NDArray, std::allocator<mxnet::NDArray> >*, std::vector<mxnet::NDArray, std::allocator<mxnet::NDArray> >*, std::unordered_map<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, mxnet::NDArray, std::hash<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::equal_to<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, mxnet::NDArray> > >*, mxnet::Executor*, std::unordered_map<nnvm::NodeEntry, mxnet::NDArray, nnvm::NodeEntryHash, nnvm::NodeEntryEqual, std::allocator<std::pair<nnvm::NodeEntry const, mxnet::NDArray> > > const&)+0x748) [0x7f65fecce8]
[bt] (8) /home/nvidia/zhuzhipeng/mxnetTX2/mxnet/python/mxnet/../../lib/libmxnet.so(mxnet::Executor::SimpleBind(nnvm::Symbol, mxnet::Context const&, std::map<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, mxnet::Context, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, mxnet::Context> > > const&, std::vector<mxnet::Context, std::allocator<mxnet::Context> > const&, std::vector<mxnet::Context, std::allocator<mxnet::Context> > const&, std::vector<mxnet::Context, std::allocator<mxnet::Context> > const&, std::unordered_map<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, nnvm::TShape, std::hash<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::equal_to<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, nnvm::TShape> > > const&, std::unordered_map<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, int, std::hash<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::equal_to<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, int> > > const&, std::unordered_map<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, int, std::hash<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::equal_to<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, int> > > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, std::unordered_set<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::hash<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::equal_to<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > > > const&, std::vector<mxnet::NDArray, std::allocator<mxnet::NDArray> >*, std::vector<mxnet::NDArray, std::allocator<mxnet::NDArray> >*, std::vector<mxnet::NDArray, std::allocator<mxnet::NDArray> >*, std::unordered_map<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, mxnet::NDArray, std::hash<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::equal_to<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, mxnet::NDArray> > >*, mxnet::Executor*)+0x10c) [0x7f65fed46c]
[bt] (9) /home/nvidia/zhuzhipeng/mxnetTX2/mxnet/python/mxnet/../../lib/libmxnet.so(MXExecutorSimpleBind+0x23b0) [0x7f65f85d30]
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
With regards,
Apache Git Services