You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@impala.apache.org by "Mostafa Mokhtar (JIRA)" <ji...@apache.org> on 2017/04/18 00:53:42 UTC
[jira] [Created] (IMPALA-5221) Crash in SaslMutexLock(void*) while
running shuffle join on 678 nodes
Mostafa Mokhtar created IMPALA-5221:
---------------------------------------
Summary: Crash in SaslMutexLock(void*) while running shuffle join on 678 nodes
Key: IMPALA-5221
URL: https://issues.apache.org/jira/browse/IMPALA-5221
Project: IMPALA
Issue Type: Bug
Components: Distributed Exec
Affects Versions: Impala 2.7.0
Reporter: Mostafa Mokhtar
Priority: Critical
Query
{code}
select /* +straight_join */ count(*)
from store_sales a join /* +shuffle */
store_returns b on
a.ss_item_sk = b.sr_item_sk
where a.ss_ticket_number = b.sr_ticket_number and ss_sold_date_sk between 2450816 and 2451500 and sr_returned_date_sk between 2450816 and 2451500
group by a.ss_ticket_number
having count(*) > 9999999999
{code}
Stack
{code}
#0 0x00007fa373e765f7 in raise () from /lib64/libc.so.6
#1 0x00007fa373e77ce8 in abort () from /lib64/libc.so.6
#2 0x00007fa375f64a55 in os::abort(bool) () from /usr/java/jdk1.7.0_67-cloudera/jre/lib/amd64/server/libjvm.so
#3 0x00007fa3760e4f87 in VMError::report_and_die() () from /usr/java/jdk1.7.0_67-cloudera/jre/lib/amd64/server/libjvm.so
#4 0x00007fa375f6996f in JVM_handle_linux_signal () from /usr/java/jdk1.7.0_67-cloudera/jre/lib/amd64/server/libjvm.so
#5 <signal handler called>
#6 0x00007fa37420cbd0 in pthread_mutex_lock () from /lib64/libpthread.so.0
#7 0x00000000009d3548 in impala::(anonymous namespace)::SaslMutexLock(void*) ()
#8 0x00007fa370fe1439 in gssapi_client_mech_step () from /usr/lib64/sasl2/libgssapiv2.so
#9 0x00007fa376c187f5 in sasl_client_step () from /lib64/libsasl2.so.3
#10 0x0000000000b4b4d8 in sasl::TSaslClient::evaluateChallengeOrResponse(unsigned char const*, unsigned int, unsigned int*) ()
#11 0x0000000000b4ee8e in apache::thrift::transport::TSaslTransport::open() ()
#12 0x0000000000da36fa in impala::ThriftClientImpl::Open() ()
#13 0x0000000000da3b02 in impala::ThriftClientImpl::OpenWithRetry(unsigned int, unsigned long) ()
#14 0x0000000000a4b55b in impala::ClientCacheHelper::CreateClient(impala::TNetworkAddress const&, boost::function<impala::ThriftClientImpl* (impala::TNetworkAddress const&, void**)>, void**) ()
#15 0x0000000000a4bb33 in impala::ClientCacheHelper::GetClient(impala::TNetworkAddress const&, boost::function<impala::ThriftClientImpl* (impala::TNetworkAddress const&, void**)>, void**) ()
#16 0x0000000000dc6f36 in impala::DataStreamSender::Channel::TransmitDataHelper(impala::TRowBatch const*) ()
#17 0x0000000000dc74f1 in impala::DataStreamSender::Channel::TransmitData(int, impala::TRowBatch const*) ()
#18 0x0000000000dc82bd in impala::ThreadPool<impala::TRowBatch*>::WorkerThread(int) ()
#19 0x0000000000bd4279 in impala::Thread::SuperviseThread(std::string const&, std::string const&, boost::function<void ()>, impala::Promise<long>*) ()
#20 0x0000000000bd4c54 in boost::detail::thread_data<boost::_bi::bind_t<void, void (*)(std::string const&, std::string const&, boost::function<void ()>, impala::Promise<long>*), boost::_bi::list4<boost::_bi::value<std::string>, boost::_bi::value<std::string>, boost::_bi::value<boost::function<void ()> >, boost::_bi::value<impala::Promise<long>*> > > >::run() ()
#21 0x0000000000e1e0ea in thread_proxy ()
#22 0x00007fa37420adc5 in start_thread () from /lib64/libpthread.so.0
#23 0x00007fa373f37ced in clone () from /lib64/libc.so.6
{code}
Seems like m is invalid
{code}
static int SaslMutexLock(void* m) {
static_cast<mutex*>(m)->lock();
return 0; // indicates success.
}
{code}
{code}
+-----------------+--------+----------+----------+---------+------------+-----------+---------------+---------------------------------------+
| Operator | #Hosts | Avg Time | Max Time | #Rows | Est. #Rows | Peak Mem | Est. Peak Mem | Detail |
+-----------------+--------+----------+----------+---------+------------+-----------+---------------+---------------------------------------+
| 08:EXCHANGE | 1 | 0ns | 0ns | 0 | 2.46B | 0 B | -1 B | UNPARTITIONED |
| 07:AGGREGATE | 678 | 1.05ms | 8.84ms | 0 | 2.46B | 2.27 MB | 609.31 MB | FINALIZE |
| 06:EXCHANGE | 678 | 0ns | 0ns | 0 | 24.61B | 0 B | 0 B | HASH(a.ss_ticket_number) |
| 03:AGGREGATE | 678 | 340.80us | 1.22ms | 0 | 24.61B | 1.27 MB | 609.31 MB | STREAMING |
| 02:HASH JOIN | 678 | 95.29ms | 38.66s | 0 | 26.36B | 137.09 MB | 195.17 MB | INNER JOIN, PARTITIONED |
| |--05:EXCHANGE | 678 | 735.21us | 20.47ms | 13.97M | 7.88B | 0 B | 0 B | HASH(b.sr_item_sk,b.sr_ticket_number) |
| | 01:SCAN HDFS | 674 | 474.54ms | 1.04s | 713.00M | 7.88B | 206.65 MB | 176.00 MB | tpcds_100000_parquet.store_returns b |
| 04:EXCHANGE | 678 | 33.35us | 384.76us | 0 | 96.23B | 0 B | 0 B | HASH(a.ss_item_sk,a.ss_ticket_number) |
| 00:SCAN HDFS | 678 | 1.10s | 1.71s | 705.58M | 96.23B | 926.86 MB | 176.00 MB | tpcds_100000_parquet.store_sales a |
+-----------------+--------+----------+----------+---------+------------+-----------+---------------+---------------------------------------+
{code}
--
This message was sent by Atlassian JIRA
(v6.3.15#6346)