You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@impala.apache.org by "Mostafa Mokhtar (JIRA)" <ji...@apache.org> on 2017/04/18 00:53:42 UTC

[jira] [Created] (IMPALA-5221) Crash in SaslMutexLock(void*) while running shuffle join on 678 nodes

Mostafa Mokhtar created IMPALA-5221:
---------------------------------------

             Summary: Crash in SaslMutexLock(void*)  while running shuffle join on 678 nodes 
                 Key: IMPALA-5221
                 URL: https://issues.apache.org/jira/browse/IMPALA-5221
             Project: IMPALA
          Issue Type: Bug
          Components: Distributed Exec
    Affects Versions: Impala 2.7.0
            Reporter: Mostafa Mokhtar
            Priority: Critical


Query
{code}
select /* +straight_join */  count(*) 
from store_sales a join   /* +shuffle */ 
     store_returns b on 
a.ss_item_sk = b.sr_item_sk 
   where a.ss_ticket_number = b.sr_ticket_number and ss_sold_date_sk between 2450816 and 2451500  and sr_returned_date_sk between 2450816 and 2451500
   group by a.ss_ticket_number 
   having count(*) > 9999999999
{code}

Stack
{code}
#0  0x00007fa373e765f7 in raise () from /lib64/libc.so.6
#1  0x00007fa373e77ce8 in abort () from /lib64/libc.so.6
#2  0x00007fa375f64a55 in os::abort(bool) () from /usr/java/jdk1.7.0_67-cloudera/jre/lib/amd64/server/libjvm.so
#3  0x00007fa3760e4f87 in VMError::report_and_die() () from /usr/java/jdk1.7.0_67-cloudera/jre/lib/amd64/server/libjvm.so
#4  0x00007fa375f6996f in JVM_handle_linux_signal () from /usr/java/jdk1.7.0_67-cloudera/jre/lib/amd64/server/libjvm.so
#5  <signal handler called>
#6  0x00007fa37420cbd0 in pthread_mutex_lock () from /lib64/libpthread.so.0
#7  0x00000000009d3548 in impala::(anonymous namespace)::SaslMutexLock(void*) ()
#8  0x00007fa370fe1439 in gssapi_client_mech_step () from /usr/lib64/sasl2/libgssapiv2.so
#9  0x00007fa376c187f5 in sasl_client_step () from /lib64/libsasl2.so.3
#10 0x0000000000b4b4d8 in sasl::TSaslClient::evaluateChallengeOrResponse(unsigned char const*, unsigned int, unsigned int*) ()
#11 0x0000000000b4ee8e in apache::thrift::transport::TSaslTransport::open() ()
#12 0x0000000000da36fa in impala::ThriftClientImpl::Open() ()
#13 0x0000000000da3b02 in impala::ThriftClientImpl::OpenWithRetry(unsigned int, unsigned long) ()
#14 0x0000000000a4b55b in impala::ClientCacheHelper::CreateClient(impala::TNetworkAddress const&, boost::function<impala::ThriftClientImpl* (impala::TNetworkAddress const&, void**)>, void**) ()
#15 0x0000000000a4bb33 in impala::ClientCacheHelper::GetClient(impala::TNetworkAddress const&, boost::function<impala::ThriftClientImpl* (impala::TNetworkAddress const&, void**)>, void**) ()
#16 0x0000000000dc6f36 in impala::DataStreamSender::Channel::TransmitDataHelper(impala::TRowBatch const*) ()
#17 0x0000000000dc74f1 in impala::DataStreamSender::Channel::TransmitData(int, impala::TRowBatch const*) ()
#18 0x0000000000dc82bd in impala::ThreadPool<impala::TRowBatch*>::WorkerThread(int) ()
#19 0x0000000000bd4279 in impala::Thread::SuperviseThread(std::string const&, std::string const&, boost::function<void ()>, impala::Promise<long>*) ()
#20 0x0000000000bd4c54 in boost::detail::thread_data<boost::_bi::bind_t<void, void (*)(std::string const&, std::string const&, boost::function<void ()>, impala::Promise<long>*), boost::_bi::list4<boost::_bi::value<std::string>, boost::_bi::value<std::string>, boost::_bi::value<boost::function<void ()> >, boost::_bi::value<impala::Promise<long>*> > > >::run() ()
#21 0x0000000000e1e0ea in thread_proxy ()
#22 0x00007fa37420adc5 in start_thread () from /lib64/libpthread.so.0
#23 0x00007fa373f37ced in clone () from /lib64/libc.so.6
{code}

Seems like m is invalid 
{code}
static int SaslMutexLock(void* m) {
  static_cast<mutex*>(m)->lock();
  return 0; // indicates success.
}
{code}

{code}
+-----------------+--------+----------+----------+---------+------------+-----------+---------------+---------------------------------------+
| Operator        | #Hosts | Avg Time | Max Time | #Rows   | Est. #Rows | Peak Mem  | Est. Peak Mem | Detail                                |
+-----------------+--------+----------+----------+---------+------------+-----------+---------------+---------------------------------------+
| 08:EXCHANGE     | 1      | 0ns      | 0ns      | 0       | 2.46B      | 0 B       | -1 B          | UNPARTITIONED                         |
| 07:AGGREGATE    | 678    | 1.05ms   | 8.84ms   | 0       | 2.46B      | 2.27 MB   | 609.31 MB     | FINALIZE                              |
| 06:EXCHANGE     | 678    | 0ns      | 0ns      | 0       | 24.61B     | 0 B       | 0 B           | HASH(a.ss_ticket_number)              |
| 03:AGGREGATE    | 678    | 340.80us | 1.22ms   | 0       | 24.61B     | 1.27 MB   | 609.31 MB     | STREAMING                             |
| 02:HASH JOIN    | 678    | 95.29ms  | 38.66s   | 0       | 26.36B     | 137.09 MB | 195.17 MB     | INNER JOIN, PARTITIONED               |
| |--05:EXCHANGE  | 678    | 735.21us | 20.47ms  | 13.97M  | 7.88B      | 0 B       | 0 B           | HASH(b.sr_item_sk,b.sr_ticket_number) |
| |  01:SCAN HDFS | 674    | 474.54ms | 1.04s    | 713.00M | 7.88B      | 206.65 MB | 176.00 MB     | tpcds_100000_parquet.store_returns b  |
| 04:EXCHANGE     | 678    | 33.35us  | 384.76us | 0       | 96.23B     | 0 B       | 0 B           | HASH(a.ss_item_sk,a.ss_ticket_number) |
| 00:SCAN HDFS    | 678    | 1.10s    | 1.71s    | 705.58M | 96.23B     | 926.86 MB | 176.00 MB     | tpcds_100000_parquet.store_sales a    |
+-----------------+--------+----------+----------+---------+------------+-----------+---------------+---------------------------------------+
{code}



--
This message was sent by Atlassian JIRA
(v6.3.15#6346)