You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@impala.apache.org by "Mostafa Mokhtar (JIRA)" <ji...@apache.org> on 2018/04/02 03:56:00 UTC

[jira] [Created] (IMPALA-6787) On large secure clusters the connection setup thread becomes bottleneck at warmup and cause occasional timeout failures

Mostafa Mokhtar created IMPALA-6787:
---------------------------------------

             Summary: On large secure clusters the connection setup thread becomes bottleneck at warmup and cause occasional timeout failures
                 Key: IMPALA-6787
                 URL: https://issues.apache.org/jira/browse/IMPALA-6787
             Project: IMPALA
          Issue Type: Bug
          Components: Distributed Exec
    Affects Versions: Impala 2.12.0
            Reporter: Mostafa Mokhtar


On +200 node clusters a single thread is not sufficient and ends up being a bottleneck for a while, which appears to cause queries to fail with 
{code}
I0401 20:20:55.032140 1806361 thrift-util.cc:123] TSocket::open() connect() <Host: va1007.foo.com Port: 22000>Connection timed out
I0401 20:20:55.032346 1806361 thrift-client.cc:78] Couldn't open transport for va1007.foo.com:22000 (connect() failed: Connection timed out)
I0401 20:20:55.032364 1806361 thrift-client.cc:94] Unable to connect to va1007.foo.com:22000
{code}

{code}
// Only using one thread here is sufficient for performance, and it avoids potential
  // thread safety issues with the thrift code called in SetupConnection.
  constexpr int CONNECTION_SETUP_POOL_SIZE = 1;

  // New - this is the thread pool used to process the internal accept queue.
  ThreadPool<shared_ptr<TTransport>> connection_setup_pool("setup-server", "setup-worker",
      CONNECTION_SETUP_POOL_SIZE, FLAGS_accepted_cnxn_queue_depth,
      [this](int tid, const shared_ptr<TTransport>& item) {
        this->SetupConnection(item);
      });
{code}

{code}
#0  0x00007fd927de8e20 in krb5int_MD5Update () from /lib64/libk5crypto.so.3
#1  0x00007fd927de7bca in k5_md5_hash () from /lib64/libk5crypto.so.3
#2  0x00007fd927e01e32 in krb5int_hmac_keyblock () from /lib64/libk5crypto.so.3
#3  0x00007fd927dfc448 in usage_key.isra.2 () from /lib64/libk5crypto.so.3
#4  0x00007fd927dfc9fc in krb5int_arcfour_decrypt () from /lib64/libk5crypto.so.3
#5  0x00007fd927df97e4 in krb5_k_decrypt () from /lib64/libk5crypto.so.3
#6  0x00007fd927df98bd in krb5_c_decrypt () from /lib64/libk5crypto.so.3
#7  0x00007fd9297191fb in rd_req_decoded_opt () from /lib64/libkrb5.so.3
#8  0x00007fd92971a1da in krb5_rd_req_decoded () from /lib64/libkrb5.so.3
#9  0x00007fd9282371df in kg_accept_krb5 () from /lib64/libgssapi_krb5.so.2
#10 0x00007fd9282388ca in krb5_gss_accept_sec_context_ext () from /lib64/libgssapi_krb5.so.2
#11 0x00007fd928238a29 in krb5_gss_accept_sec_context () from /lib64/libgssapi_krb5.so.2
#12 0x00007fd92822607a in gss_accept_sec_context () from /lib64/libgssapi_krb5.so.2
#13 0x00007fd92653aedc in gssapi_server_mech_step () from /usr/lib64/sasl2/libgssapiv2.so
#14 0x00007fd92bc27b9b in sasl_server_step () from /lib64/libsasl2.so.3
#15 0x0000000000caf3b1 in sasl::TSaslServer::evaluateChallengeOrResponse(unsigned char const*, unsigned int, unsigned int*) ()
#16 0x0000000000cb3040 in apache::thrift::transport::TSaslTransport::doSaslNegotiation() ()
#17 0x0000000000cb1488 in apache::thrift::transport::TSaslServerTransport::Factory::getTransport(boost::shared_ptr<apache::thrift::transport::TTransport>) ()
#18 0x0000000000b143c7 in apache::thrift::server::TAcceptQueueServer::SetupConnection(boost::shared_ptr<apache::thrift::transport::TTransport>) ()
#19 0x0000000000b14eb2 in boost::detail::function::void_function_obj_invoker2<apache::thrift::server::TAcceptQueueServer::serve()::{lambda(int, boost::shared_ptr<apache::thrift::transport::TTransport> const&)#1}, void, int, boost::shared_ptr<apache::thrift::transport::TTransport> const&>::invoke(boost::detail::function::function_buffer&, int, boost::shared_ptr<apache::thrift::transport::TTransport> const&) ()
#20 0x0000000000b17d79 in impala::ThreadPool<boost::shared_ptr<apache::thrift::transport::TTransport> >::WorkerThread(int) ()
#21 0x0000000000d6049f in impala::Thread::SuperviseThread(std::string const&, std::string const&, boost::function<void ()>, impala::ThreadDebugInfo const*, impala::Promise<long>*) ()
#22 0x0000000000d60c9a in boost::detail::thread_data<boost::_bi::bind_t<void, void (*)(std::string const&, std::string const&, boost::function<void ()>, impala::ThreadDebugInfo const*, impala::Promise<long>*), boost::_bi::list5<boost::_bi::value<std::string>, boost::_bi::value<std::string>, boost::_bi::value<boost::function<void ()> >, boost::_bi::value<impala::ThreadDebugInfo*>, boost::_bi::value<impala::Promise<long>*> > > >::run() ()
#23 0x00000000012d794a in thread_proxy ()
#24 0x00007fd928c7ddc5 in start_thread () from /lib64/libpthread.so.0
#25 0x00007fd9289aaced in clone () from /lib64/libc.so.6
{code}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)