You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@impala.apache.org by "Tim Armstrong (JIRA)" <ji...@apache.org> on 2018/01/11 21:30:04 UTC

[jira] [Created] (IMPALA-6389) Crash when querying table with "\0" as a row delimiter

Tim Armstrong created IMPALA-6389:
-------------------------------------

             Summary: Crash when querying table with "\0" as a row delimiter
                 Key: IMPALA-6389
                 URL: https://issues.apache.org/jira/browse/IMPALA-6389
             Project: IMPALA
          Issue Type: Bug
          Components: Backend
    Affects Versions: Impala 2.10.0, Impala 2.9.0, Impala 2.8.0, Impala 2.11.0
            Reporter: Tim Armstrong
            Priority: Blocker


A user reported this bug here: http://community.cloudera.com/t5/Interactive-Short-cycle-SQL/Impala-quot-Cancelled-due-to-unreachable-impalad-s-quot-when/m-p/63577#M4044?eid=1&aid=1

The following sequence causes an impalad crash:
{noformat}
create table tab_separated(id bigint, s string, n int, t timestamp, b boolean)
  row format delimited
  fields terminated by '\t' escaped by '\\' lines terminated by '\000'
  stored as textfile;
select * from tab_separated; -- Done. 0 results.
insert into tab_separated (id, s) values (100, ''); -- Success.
select * from tab_separated; -- 20 second delay before getting "Cancelled due to unreachable impalad(s): xxxx:22000"
{noformat}

{noformat}
(gdb) bt
#0  0x00007f90414371f7 in raise () from /lib64/libc.so.6
#1  0x00007f90414388e8 in abort () from /lib64/libc.so.6
#2  0x00007f9044105185 in os::abort(bool) () from /usr/java/jdk1.8.0_144/jre/lib/amd64/server/libjvm.so
#3  0x00007f90442a7593 in VMError::report_and_die() () from /usr/java/jdk1.8.0_144/jre/lib/amd64/server/libjvm.so
#4  0x00007f904410a68f in JVM_handle_linux_signal () from /usr/java/jdk1.8.0_144/jre/lib/amd64/server/libjvm.so
#5  0x00007f9044100be3 in signalHandler(int, siginfo*, void*) () from /usr/java/jdk1.8.0_144/jre/lib/amd64/server/libjvm.so
#6  <signal handler called>
#7  0x00007f904154e574 in __memcpy_ssse3_back () from /lib64/libc.so.6
#8  0x0000000000dc01a0 in impala::StringBuffer::Append (this=this@entry=0x723c5c0, str=0x722a00f "\025\b\034\030\022William S. Pollard\030\016Adolfo A. Lieb\026", str_len=-1)
    at /usr/src/debug/impala-2.11.0-cdh5.15.0-SNAPSHOT/be/src/runtime/string-buffer.h:54
#9  0x0000000000dbdc05 in impala::HdfsTextScanner::ProcessRange (this=this@entry=0x723c400, row_batch=row_batch@entry=0x74157a0, num_tuples=num_tuples@entry=0x7f8fe4f3821c)
    at /usr/src/debug/impala-2.11.0-cdh5.15.0-SNAPSHOT/be/src/exec/hdfs-text-scanner.cc:397
#10 0x0000000000dbfdeb in impala::HdfsTextScanner::GetNextInternal (this=0x723c400, row_batch=0x74157a0)
    at /usr/src/debug/impala-2.11.0-cdh5.15.0-SNAPSHOT/be/src/exec/hdfs-text-scanner.cc:446
#11 0x0000000000d9d18d in impala::HdfsScanner::ProcessSplit (this=0x723c400) at /usr/src/debug/impala-2.11.0-cdh5.15.0-SNAPSHOT/be/src/exec/hdfs-scanner.cc:121
#12 0x0000000000d8e1d9 in impala::HdfsScanNode::ProcessSplit (this=0x5496300, filter_ctxs=..., expr_results_pool=0x8000, expr_results_pool@entry=0x7f8fe4f38780, 
    scan_range=0x64c8000) at /usr/src/debug/impala-2.11.0-cdh5.15.0-SNAPSHOT/be/src/exec/hdfs-scan-node.cc:532
#13 0x0000000000d8fd65 in impala::HdfsScanNode::ScannerThread (this=0x5496300) at /usr/src/debug/impala-2.11.0-cdh5.15.0-SNAPSHOT/be/src/exec/hdfs-scan-node.cc:442
#14 0x0000000000d90262 in operator() (__closure=0x7f8fe4f38c68) at /usr/src/debug/impala-2.11.0-cdh5.15.0-SNAPSHOT/be/src/exec/hdfs-scan-node.cc:354
#15 boost::detail::function::void_function_obj_invoker0<impala::HdfsScanNode::ThreadTokenAvailableCb(impala::ThreadResourceMgr::ResourcePool*)::<lambda()>, void>::invoke(boost::detail::function::function_buffer &) (function_obj_ptr=...) at /usr/src/debug/impala-2.11.0-cdh5.15.0-SNAPSHOT/toolchain/boost-1.57.0-p3/include/boost/function/function_template.hpp:153
#16 0x0000000000d19db3 in operator() (this=0x7f8fe4f38c60)
    at /usr/src/debug/impala-2.11.0-cdh5.15.0-SNAPSHOT/toolchain/boost-1.57.0-p3/include/boost/function/function_template.hpp:767
#17 impala::Thread::SuperviseThread(std::string const&, std::string const&, boost::function<void ()>, impala::Promise<long>*) (name=..., category=..., functor=..., 
    thread_started=<optimized out>) at /usr/src/debug/impala-2.11.0-cdh5.15.0-SNAPSHOT/be/src/util/thread.cc:356
#18 0x0000000000d1a544 in operator()<void (*)(const std::basic_string<char>&, const std::basic_string<char>&, boost::function<void()>, impala::Promise<long int>*), boost::_bi::list0> (f=@0x7f705b8: 0xd19b20 <impala::Thread::SuperviseThread(std::string const&, std::string const&, boost::function<void ()>, impala::Promise<long>*)>, a=<synthetic pointer>, 
    this=0x7f705c0) at /usr/src/debug/impala-2.11.0-cdh5.15.0-SNAPSHOT/toolchain/boost-1.57.0-p3/include/boost/bind/bind.hpp:457
#19 operator() (this=0x7f705b8) at /usr/src/debug/impala-2.11.0-cdh5.15.0-SNAPSHOT/toolchain/boost-1.57.0-p3/include/boost/bind/bind_template.hpp:20
#20 boost::detail::thread_data<boost::_bi::bind_t<void, void (*)(std::string const&, std::string const&, boost::function<void ()>, impala::Promise<long>*), boost::_bi::list4<boost::_bi::value<std::string>, boost::_bi::value<std::string>, boost::_bi::value<boost::function<void ()> >, boost::_bi::value<impala::Promise<long>*> > > >::run() (this=0x7f70400)
    at /usr/src/debug/impala-2.11.0-cdh5.15.0-SNAPSHOT/toolchain/boost-1.57.0-p3/include/boost/thread/detail/thread.hpp:116
#21 0x00000000012ad78a in thread_proxy ()
#22 0x00007f90417cce25 in start_thread () from /lib64/libpthread.so.0
#23 0x00007f90414fa34d in clone () from /lib64/libc.so.6
{noformat}

It looks like col_start and byte_buffer_ptr_ are somehow inconsistent and eventually a negative value gets passed into memcpy() and converted to a huge unsigned number.
{noformat}
(gdb) p col_start
$1 = 0x722a00f "\025\b\034\030\022William S. Pollard\030\016Adolfo A. Lieb\026"
(gdb) p byte_buffer_ptr_
$2 = 0x722a00e "\006\025\b\034\030\022William S. Pollard\030\016Adolfo A. Lieb\026"
{noformat}

The above strings are some test data from the environment we were running on, rather than actual table data.



--
This message was sent by Atlassian JIRA
(v6.4.14#64029)