You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues-all@impala.apache.org by "Zoram Thanga (JIRA)" <ji...@apache.org> on 2018/09/05 20:36:00 UTC

[jira] [Commented] (IMPALA-6764) Codegend UnionNode::MaterializeBatch() causes memory corruption crash of Impalad

    [ https://issues.apache.org/jira/browse/IMPALA-6764?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16604907#comment-16604907 ] 

Zoram Thanga commented on IMPALA-6764:
--------------------------------------

IMPALA-6059 changed the implementation of StringFunctions::Trim() enough that this is not reproducible anymore.

Marking this as fixed by the above-mentioned JIRA.

> Codegend UnionNode::MaterializeBatch() causes memory corruption crash of Impalad
> --------------------------------------------------------------------------------
>
>                 Key: IMPALA-6764
>                 URL: https://issues.apache.org/jira/browse/IMPALA-6764
>             Project: IMPALA
>          Issue Type: Bug
>          Components: Backend
>    Affects Versions: Impala 2.11.0
>            Reporter: Zoram Thanga
>            Assignee: Zoram Thanga
>            Priority: Critical
>         Attachments: bad-materializebatch-disasm.txt, good-materializebatch-disasm.txt
>
>
> A CTAS statement involving UNION ALL with LEFT JOIN children is reliably crashing with a stack trace similar to the following:
> {noformat}
> (gdb) bt
> #0  0x00007fb85fdf11f7 in raise () from ./debug-stuff/lib64/libc.so.6
> #1  0x00007fb85fdf28e8 in abort () from ./debug-stuff/lib64/libc.so.6
> #2  0x00007fb862106f35 in os::abort(bool) () from ./debug-stuff/usr/java/jdk1.8.0_162/jre/lib/amd64/server/libjvm.so
> #3  0x00007fb8622aaf33 in VMError::report_and_die() () from ./debug-stuff/usr/java/jdk1.8.0_162/jre/lib/amd64/server/libjvm.so
> #4  0x00007fb86210d22f in JVM_handle_linux_signal () from ./debug-stuff/usr/java/jdk1.8.0_162/jre/lib/amd64/server/libjvm.so
> #5  0x00007fb862103253 in signalHandler(int, siginfo*, void*) () from ./debug-stuff/usr/java/jdk1.8.0_162/jre/lib/amd64/server/libjvm.so
> #6  <signal handler called>
> #7  0x00007fb85ff08706 in __memcpy_ssse3_back () from ./debug-stuff/lib64/libc.so.6
> #8  0x00007fb840700d73 in impala::UnionNode::MaterializeBatch(impala::RowBatch*, unsigned char**) [clone .588] ()
> #9  0x0000000001001806 in impala::UnionNode::GetNextMaterialized (this=this@entry=0x8280000, state=state@entry=0x848ed00, row_batch=row_batch@entry=0xcef9950)
>     at /usr/src/debug/impala-2.11.0-cdh5.14.0/be/src/exec/union-node.cc:228
> #10 0x0000000001001b5c in impala::UnionNode::GetNext (this=0x8280000, state=0x848ed00, row_batch=0xcef9950, eos=0x7fb7fe9a987e)
>     at /usr/src/debug/impala-2.11.0-cdh5.14.0/be/src/exec/union-node.cc:294
> #11 0x0000000000b724d2 in impala::FragmentInstanceState::ExecInternal (this=this@entry=0x4c030c0)
>     at /usr/src/debug/impala-2.11.0-cdh5.14.0/be/src/runtime/fragment-instance-state.cc:270
> #12 0x0000000000b74e42 in impala::FragmentInstanceState::Exec (this=this@entry=0x4c030c0) at /usr/src/debug/impala-2.11.0-cdh5.14.0/be/src/runtime/fragment-instance-state.cc:89
> #13 0x0000000000b64488 in impala::QueryState::ExecFInstance (this=0x8559200, fis=0x4c030c0) at /usr/src/debug/impala-2.11.0-cdh5.14.0/be/src/runtime/query-state.cc:382
> #14 0x0000000000d13613 in boost::function0<void>::operator() (this=0x7fb7fe9a9c60)
>     at /usr/src/debug/impala-2.11.0-cdh5.14.0/toolchain/boost-1.57.0-p3/include/boost/function/function_template.hpp:767
> #15 impala::Thread::SuperviseThread(std::string const&, std::string const&, boost::function<void ()>, impala::Promise<long>*) (name=..., category=..., functor=..., 
>     thread_started=0x7fb7f999f0f0) at /usr/src/debug/impala-2.11.0-cdh5.14.0/be/src/util/thread.cc:352
> #16 0x0000000000d13d54 in boost::_bi::list4<boost::_bi::value<std::basic_string<char, std::char_traits<char>, std::allocator<char> > >, boost::_bi::value<std::basic_string<char, std::char_traits<char>, std::allocator<char> > >, boost::_bi::value<boost::function<void()> >, boost::_bi::value<impala::Promise<long int>*> >::operator()<void (*)(const std::basic_string<char>&, const std::basic_string<char>&, boost::function<void()>, impala::Promise<long int>*), boost::_bi::list0> (
>     f=@0x808bfb8: 0xd13460 <impala::Thread::SuperviseThread(std::string const&, std::string const&, boost::function<void ()>, impala::Promise<long>*)>, a=<synthetic pointer>, 
>     this=0x808bfc0) at /usr/src/debug/impala-2.11.0-cdh5.14.0/toolchain/boost-1.57.0-p3/include/boost/bind/bind.hpp:457
> #17 boost::_bi::bind_t<void, void (*)(std::string const&, std::string const&, boost::function<void ()>, impala::Promise<long>*), boost::_bi::list4<boost::_bi::value<std::string>, boost::_bi::value<std::string>, boost::_bi::value<boost::function<void ()> >, boost::_bi::value<impala::Promise<long>*> > >::operator()() (this=0x808bfb8)
>     at /usr/src/debug/impala-2.11.0-cdh5.14.0/toolchain/boost-1.57.0-p3/include/boost/bind/bind_template.hpp:20
> #18 boost::detail::thread_data<boost::_bi::bind_t<void, void (*)(std::string const&, std::string const&, boost::function<void ()>, impala::Promise<long>*), boost::_bi::list4<boost::_bi::value<std::string>, boost::_bi::value<std::string>, boost::_bi::value<boost::function<void ()> >, boost::_bi::value<impala::Promise<long>*> > > >::run() (this=0x808be00)
>     at /usr/src/debug/impala-2.11.0-cdh5.14.0/toolchain/boost-1.57.0-p3/include/boost/thread/detail/thread.hpp:116
> #19 0x000000000128e8ea in thread_proxy ()
> #20 0x00007fb860186e25 in start_thread () from ./debug-stuff/lib64/libpthread.so.0
> #21 0x00007fb85feb434d in clone () from ./debug-stuff/lib64/libc.so.6
> {noformat}
> The exact location or reason of the crash varies, i.e., sometimes we crash while accessing the source address of memcpy, other times we crash on accessing the destination address. In this particular instance, we see:
> {noformat}
>    0x00007fb85ff086e4 <+6676>:  add    %rdx,%rsi
>    0x00007fb85ff086e7 <+6679>:  add    %rdx,%rdi
>    0x00007fb85ff086ea <+6682>:  lea    0x375df(%rip),%r11        # 0x7fb85ff3fcd0
>    0x00007fb85ff086f1 <+6689>:  movslq (%r11,%rdx,4),%rdx
>    0x00007fb85ff086f5 <+6693>:  lea    (%r11,%rdx,1),%rdx
>    0x00007fb85ff086f9 <+6697>:  jmpq   *%rdx
>    0x00007fb85ff086fb <+6699>:  ud2    
>    0x00007fb85ff086fd <+6701>:  nopl   (%rax)
>    0x00007fb85ff08700 <+6704>:  add    %rdx,%rsi
>    0x00007fb85ff08703 <+6707>:  add    %rdx,%rdi
> => 0x00007fb85ff08706 <+6710>:  movdqu -0x10(%rsi),%xmm0
>    0x00007fb85ff0870b <+6715>:  lea    -0x10(%rdi),%r8
>    0x00007fb85ff0870f <+6719>:  mov    %rdi,%r9
>    0x00007fb85ff08712 <+6722>:  and    $0xfffffffffffffff0,%rdi
>    0x00007fb85ff08716 <+6726>:  sub    %rdi,%r9
>    0x00007fb85ff08719 <+6729>:  sub    %r9,%rsi
>    0x00007fb85ff0871c <+6732>:  sub    %r9,%rdx
>    0x00007fb85ff0871f <+6735>:  mov    0x26fb0a(%rip),%rcx        # 0x7fb860178230 <__x86_64_shared_cache_size_half>
>    0x00007fb85ff08726 <+6742>:  cmp    %rcx,%rdx
> {noformat}
> which looks like the source address is whacked.
> Setting DISABLE_CODEGEN=TRUE for the statement avoids the crash, which means that the generated code is somehow using invalid pointers.
> The crash has reproduced on RHEL/CENTOS 6 and 7.



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-all-unsubscribe@impala.apache.org
For additional commands, e-mail: issues-all-help@impala.apache.org