You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by GitBox <gi...@apache.org> on 2020/08/26 23:11:55 UTC

[GitHub] [incubator-mxnet] Zha0q1 commented on issue #19022: MKL numpy rnn core dump

Zha0q1 commented on issue #19022:
URL: https://github.com/apache/incubator-mxnet/issues/19022#issuecomment-681170106


   GDB backtrace:
   when the error is 
   ```
   ubuntu@ip-172-31-38-169:~/incubator-mxnet$ python rnn.py 
   [22:40:24] ../src/storage/storage.cc:198: Using Pooled (Naive) StorageManager for CPU
   corrupted size vs. prev_size
   Aborted (core dumped)
   ```
   corrupted size vs. prev_size
   
   Thread 21 "python" received signal SIGABRT, Aborted.
   [Switching to Thread 0x7fff692dd700 (LWP 78491)]
   __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:51
   51	../sysdeps/unix/sysv/linux/raise.c: No such file or directory.
   (gdb) bt
   #0  __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:51
   #1  0x00007ffff78058b1 in __GI_abort () at abort.c:79
   #2  0x00007ffff784e907 in __libc_message (action=action@entry=do_abort, fmt=fmt@entry=0x7ffff797bdfa "%s\n")
       at ../sysdeps/posix/libc_fatal.c:181
   #3  0x00007ffff785597a in malloc_printerr (str=str@entry=0x7ffff7979efd "corrupted size vs. prev_size")
       at malloc.c:5350
   #4  0x00007ffff7855b7c in malloc_consolidate (av=av@entry=0x7fff4c000020) at malloc.c:4456
   #5  0x00007ffff7859848 in _int_malloc (av=av@entry=0x7fff4c000020, bytes=bytes@entry=5120) at malloc.c:3703
   #6  0x00007ffff785a55b in _int_memalign (av=0x7fff4c000020, alignment=64, bytes=<optimized out>)
       at malloc.c:4694
   #7  0x00007ffff786002a in _mid_memalign (address=<optimized out>, bytes=5016, alignment=<optimized out>)
       at malloc.c:3314
   #8  __posix_memalign (memptr=0x7fff692da9c0, alignment=<optimized out>, size=5016) at malloc.c:5369
   #9  0x00007fffe64adad2 in dnnl::impl::malloc (size=5016, alignment=64)
       at ../3rdparty/mkldnn/src/common/utils.cpp:99
   #10 0x00007fffe63e4ff8 in dnnl::impl::c_compatible::operator new (sz=5016)
       at ../3rdparty/mkldnn/src/common/nstl.hpp:40
   #11 0x00007fffe69cd594 in dnnl::impl::cpu::jit_uni_reorder_t::pd_t::create (reorder_pd=0x7fff692daef8, 
       engine=0x555557207880, attr=0x7fff4c086fc0, src_engine=0x555557207880, src_md=0x7fff692daf20, 
       dst_engine=0x555557207880, dst_md=0x7fff692db1e0) at ../3rdparty/mkldnn/src/cpu/jit_uni_reorder.cpp:1086
   #12 0x00007fffe69c5e76 in dnnl::impl::cpu::jit_uni_reorder_create (reorder_pd=0x7fff692daef8, 
       engine=0x555557207880, attr=0x7fff4c086fc0, src_engine=0x555557207880, src_md=0x7fff692daf20, 
       dst_engine=0x555557207880, dst_md=0x7fff692db1e0) at ../3rdparty/mkldnn/src/cpu/jit_uni_reorder.cpp:1247
   #13 0x00007fffe64a1e8f in dnnl_reorder_primitive_desc_create (reorder_pd=0x7fff692daef8, 
   ---Type <return> to continue, or q <return> to quit---
       src_md=0x7fff692daf20, src_engine=0x555557207880, dst_md=0x7fff692db1e0, dst_engine=0x555557207880, 
       attr=0x7fff4c086fc0) at ../3rdparty/mkldnn/src/common/reorder.cpp:73
   #14 0x00007fffdb0a263c in dnnl::reorder::primitive_desc::primitive_desc (this=0x7fff692db4e0, src=..., 
       dst=..., attr=...) at ../3rdparty/mkldnn/include/dnnl.hpp:3166
   #15 0x00007fffdb0a274e in dnnl::reorder::reorder (this=0x7fff692db540, src=..., dst=..., attr=...)
       at ../3rdparty/mkldnn/include/dnnl.hpp:3217
   #16 0x00007fffdbfe3213 in mxnet::op::MKLDNNMemoryReorder (src=..., dst=...)
       at ../src/operator/nn/mkldnn/mkldnn_rnn.cc:399
   #17 0x00007fffdbfda681 in mxnet::op::MKLDNNRnnBackward::SetNativeWeightsGrads (this=0x7fff4c077390)
       at ../src/operator/nn/mkldnn/mkldnn_rnn.cc:867
   #18 0x00007fffdbfe34f3 in mxnet::op::RegisterMKLDNNRnn<mxnet::op::MKLDNNRnnBackward> (rnn=...)
       at ../src/operator/nn/mkldnn/mkldnn_rnn.cc:991
   
   #19 0x00007fffdbfde69f in mxnet::op::MKLDNNRnnOp::Backward (this=0x55555731fbf0, ctx=..., 
       inputs=std::vector of length 5, capacity 5 = {...}, req=std::vector of length 3, capacity 3 = {...}, 
       outputs=std::vector of length 3, capacity 3 = {...}) at ../src/operator/nn/mkldnn/mkldnn_rnn.cc:1203
   #20 0x00007fffe31da173 in mxnet::op::RNNStatefulGradComputeExCPU (state_ptr=..., ctx=..., 
       inputs=std::vector of length 5, capacity 5 = {...}, req=std::vector of length 3, capacity 3 = {...}, 
       outputs=std::vector of length 3, capacity 3 = {...}) at ../src/operator/rnn.cc:284
   #21 0x00007fffdac7f26b in std::_Function_handler<void (mxnet::OpStatePtr const&, mxnet::OpContext const&, std::vector<mxnet::NDArray, std::allocator<mxnet::NDArray> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::NDArray, std::allocator<mxnet::NDArray> > const&), void (*)(mxnet::OpStatePtr const&, mxnet::OpContext const&, std::vector<mxnet::NDArray, std::allocator<mxnet::NDArray> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::NDArray, std::allocator<mxnet::NDArray> > const&)>::_M_invoke(std::_Any_data const&, mxnet::OpStatePtr const&, mxnet::OpC---Type <return> to continue, or q <return> to quit---
   ontext const&, std::vector<mxnet::NDArray, std::allocator<mxnet::NDArray> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::NDArray, std::allocator<mxnet::NDArray> > const&) (__functor=..., __args#0=..., __args#1=..., __args#2=std::vector of length 5, capacity 5 = {...}, 
       __args#3=std::vector of length 3, capacity 3 = {...}, 
       __args#4=std::vector of length 3, capacity 3 = {...}) at /usr/include/c++/7/bits/std_function.h:316
   #22 0x00007fffdadcd088 in std::function<void (mxnet::OpStatePtr const&, mxnet::OpContext const&, std::vector<mxnet::NDArray, std::allocator<mxnet::NDArray> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::NDArray, std::allocator<mxnet::NDArray> > const&)>::operator()(mxnet::OpStatePtr const&, mxnet::OpContext const&, std::vector<mxnet::NDArray, std::allocator<mxnet::NDArray> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::NDArray, std::allocator<mxnet::NDArray> > const&) const (this=0x555557295ce8, __args#0=..., __args#1=..., 
       __args#2=std::vector of length 5, capacity 5 = {...}, 
       __args#3=std::vector of length 3, capacity 3 = {...}, 
       __args#4=std::vector of length 3, capacity 3 = {...}) at /usr/include/c++/7/bits/std_function.h:706
   #23 0x00007fffdae42905 in mxnet::imperative::PushOperator(mxnet::OpStatePtr const&, nnvm::Op const*, nnvm::NodeAttrs const&, mxnet::Context const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::Resource, std::allocator<mxnet::Resource> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<unsigned int, std::allocator<unsigned int> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, mxnet::DispatchMode)::{lambda(mxnet::RunContext, mxnet::engine::CallbackOnComplete)#1}::operator()(mxnet::RunContext, mxnet::engine::CallbackOnComplete) const (__closure=0x555557295be0, rctx=..., on_complete=...)
       at ../src/imperative/./imperative_utils.h:758
   #24 0x00007fffdae42b02 in mxnet::imperative::PushOperator(mxnet::OpStatePtr const&, nnvm::Op const*, nnvm::Nod---Type <return> to continue, or q <return> to quit---
   eAttrs const&, mxnet::Context const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::Resource, std::allocator<mxnet::Resource> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<unsigned int, std::allocator<unsigned int> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, mxnet::DispatchMode)::{lambda(mxnet::RunContext)#2}::operator()(mxnet::RunContext) const (
       __closure=0x555557295be0, rctx=...) at ../src/imperative/./imperative_utils.h:772
   #25 0x00007fffdae48f9e in std::_Function_handler<void (mxnet::RunContext), mxnet::imperative::PushOperator(mxnet::OpStatePtr const&, nnvm::Op const*, nnvm::NodeAttrs const&, mxnet::Context const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::Resource, std::allocator<mxnet::Resource> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<unsigned int, std::allocator<unsigned int> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, mxnet::DispatchMode)::{lambda(mxnet::RunContext)#2}>::_M_invoke(std::_Any_data const&, mxnet::RunContext&&) (__functor=..., __args#0=...)
       at /usr/include/c++/7/bits/std_function.h:316
   #26 0x00007fffdad9f5a2 in std::function<void (mxnet::RunContext)>::operator()(mxnet::RunContext) const (
       this=0x555557194200, __args#0=...) at /usr/include/c++/7/bits/std_function.h:706
   #27 0x00007fffdada8260 in mxnet::engine::ThreadedEngine::BulkFlush()::{lambda(mxnet::RunContext, mxnet::engine::CallbackOnComplete)#1}::operator()(mxnet::RunContext, mxnet::engine::CallbackOnComplete) const (
       __closure=0x5555572dab50, ctx=..., on_complete=...) at ../src/engine/./threaded_engine.h:537
   #28 0x00007fffdadac835 in std::_Function_handler<void (mxnet::RunContext, mxnet::engine::CallbackOnComplete), mxnet::engine::ThreadedEngine::BulkFlush()::{lambda(mxnet::RunContext, mxnet::engine::CallbackOnComplete)#1}>::_M_invoke(std::_Any_data const&, mxnet::RunContext&&, mxnet::engine::CallbackOnComplete&&) (__functor=..., 
   ---Type <return> to continue, or q <return> to quit---
       __args#0=..., __args#1=...) at /usr/include/c++/7/bits/std_function.h:316
   #29 0x00007fffdada03d0 in std::function<void (mxnet::RunContext, mxnet::engine::CallbackOnComplete)>::operator()(mxnet::RunContext, mxnet::engine::CallbackOnComplete) const (this=0x55555731c630, __args#0=..., 
       __args#1=...) at /usr/include/c++/7/bits/std_function.h:706
   #30 0x00007fffdadb4fd4 in mxnet::engine::ThreadedEngine::ExecuteOprBlock (this=0x555557319ac0, run_ctx=..., 
       opr_block=0x55555731e1b8) at ../src/engine/./threaded_engine.h:381
   #31 0x00007fffdadb8964 in mxnet::engine::ThreadedEnginePerDevice::CPUWorker<(dmlc::ConcurrentQueueType)0> (
       this=0x555557319ac0, ctx=..., block=0x5555563ad5c0, 
       ready_event=std::shared_ptr<dmlc::ManualEvent> (use count 2, weak count 0) = {...})
       at ../src/engine/threaded_engine_perdevice.cc:304
   #32 0x00007fffdadb62f4 in mxnet::engine::ThreadedEnginePerDevice::PushToExecute(mxnet::engine::OprBlock*, bool)::{lambda()#1}::operator()() const::{lambda(std::shared_ptr<dmlc::ManualEvent>)#1}::operator()(dmlc::ManualEvent) const (__closure=0x55555731bf40, 
       ready_event=std::shared_ptr<dmlc::ManualEvent> (use count 2, weak count 0) = {...})
       at ../src/engine/threaded_engine_perdevice.cc:120
   #33 0x00007fffdadbecc0 in std::_Function_handler<void (std::shared_ptr<dmlc::ManualEvent>), mxnet::engine::ThreadedEnginePerDevice::PushToExecute(mxnet::engine::OprBlock*, bool)::{lambda()#1}::operator()() const::{lambda(std::shared_ptr<dmlc::ManualEvent>)#1}>::_M_invoke(std::_Any_data const&, std::shared_ptr<dmlc::ManualEvent>&&) (__functor=..., __args#0=...) at /usr/include/c++/7/bits/std_function.h:316
   #34 0x00007fffdadbd927 in std::function<void (std::shared_ptr<dmlc::ManualEvent>)>::operator()(std::shared_ptr<dmlc::ManualEvent>) const (this=0x55555724b428, __args#0=std::shared_ptr<dmlc::ManualEvent> (empty) = {...})
       at /usr/include/c++/7/bits/std_function.h:706
   #35 0x00007fffdadbb605 in std::__invoke_impl<void, std::function<void (std::shared_ptr<dmlc::ManualEvent>)>, std::shared_ptr<dmlc::ManualEvent> >(std::__invoke_other, std::function<void (std::shared_ptr<dmlc::ManualEvent---Type <return> to continue, or q <return> to quit---
   >)>&&, std::shared_ptr<dmlc::ManualEvent>&&) (__f=...) at /usr/include/c++/7/bits/invoke.h:60
   #36 0x00007fffdadb7643 in std::__invoke<std::function<void (std::shared_ptr<dmlc::ManualEvent>)>, std::shared_ptr<dmlc::ManualEvent> >(std::function<void (std::shared_ptr<dmlc::ManualEvent>)>&&, std::shared_ptr<dmlc::ManualEvent>&&) (__fn=...) at /usr/include/c++/7/bits/invoke.h:95
   #37 0x00007fffdadc4fc9 in std::thread::_Invoker<std::tuple<std::function<void (std::shared_ptr<dmlc::ManualEvent>)>, std::shared_ptr<dmlc::ManualEvent> > >::_M_invoke<0ul, 1ul>(std::_Index_tuple<0ul, 1ul>) (
       this=0x55555724b418) at /usr/include/c++/7/thread:234
   #38 0x00007fffdadc4f31 in std::thread::_Invoker<std::tuple<std::function<void (std::shared_ptr<dmlc::ManualEvent>)>, std::shared_ptr<dmlc::ManualEvent> > >::operator()() (this=0x55555724b418)
       at /usr/include/c++/7/thread:243
   #39 0x00007fffdadc4ed0 in std::thread::_State_impl<std::thread::_Invoker<std::tuple<std::function<void (std::shared_ptr<dmlc::ManualEvent>)>, std::shared_ptr<dmlc::ManualEvent> > > >::_M_run() (this=0x55555724b410)
       at /usr/include/c++/7/thread:186
   #40 0x00007fffd3e196df in ?? () from /usr/lib/x86_64-linux-gnu/libstdc++.so.6
   #41 0x00007ffff7bbd6db in start_thread (arg=0x7fff692dd700) at pthread_create.c:463
   #42 0x00007ffff78e6a3f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
   ```
   
   ```


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org