You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues-all@impala.apache.org by "Lars Volker (JIRA)" <ji...@apache.org> on 2018/11/30 23:03:00 UTC

[jira] [Commented] (IMPALA-6955) Timeout when starting test_query_expiration custom cluster

    [ https://issues.apache.org/jira/browse/IMPALA-6955?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16705392#comment-16705392 ] 

Lars Volker commented on IMPALA-6955:
-------------------------------------

I think I've seen this again. The callstacks look alike:

{noformat}
#0  0x00007f3e989901f7 in raise () from /lib64/libc.so.6
#1  0x00007f3e989918e8 in abort () from /lib64/libc.so.6
#2  0x00007f3e9bb61185 in os::abort(bool) () from /usr/java/jdk1.8.0_144/jre/lib/amd64/server/libjvm.so
#3  0x00007f3e9bd03593 in VMError::report_and_die() () from /usr/java/jdk1.8.0_144/jre/lib/amd64/server/libjvm.so
#4  0x00007f3e9bb6668f in JVM_handle_linux_signal () from /usr/java/jdk1.8.0_144/jre/lib/amd64/server/libjvm.so
#5  0x00007f3e9bb5cbe3 in signalHandler(int, siginfo*, void*) () from /usr/java/jdk1.8.0_144/jre/lib/amd64/server/libjvm.so
#6  <signal handler called>
#7  0x000000000195bfa2 in base::subtle::NoBarrier_CompareAndSwap (ptr=0x238, old_value=0, new_value=1) at /data/jenkins/workspace/impala-cdh6.1.x-exhaustive/repos/Impala/be/src/gutil/atomicops-internals-x86.h:85
#8  0x000000000195c022 in base::subtle::Acquire_CompareAndSwap (ptr=0x238, old_value=0, new_value=1) at /data/jenkins/workspace/impala-cdh6.1.x-exhaustive/repos/Impala/be/src/gutil/atomicops-internals-x86.h:138
#9  0x000000000195c33e in base::SpinLock::Lock (this=0x238) at /data/jenkins/workspace/impala-cdh6.1.x-exhaustive/repos/Impala/be/src/gutil/spinlock.h:74
#10 0x000000000195c3c8 in impala::SpinLock::lock (this=0x238) at /data/jenkins/workspace/impala-cdh6.1.x-exhaustive/repos/Impala/be/src/util/spinlock.h:34
#11 0x0000000001f6cca8 in impala::ScopedShardedMapRef<std::shared_ptr<impala::ClientRequestState> >::ScopedShardedMapRef (this=0x7f3e0a3aeb90, query_id=..., sharded_map=0x1c0) at /data/jenkins/workspace/impala-cdh6.1.x-exhaustive/repos/Impala/be/src/util/sharded-query-map-util.h:99
#12 0x0000000001f61d28 in impala::ImpalaServer::GetClientRequestState (this=0xd272000, query_id=...) at /data/jenkins/workspace/impala-cdh6.1.x-exhaustive/repos/Impala/be/src/service/impala-server.cc:2348
#13 0x0000000001fe9e92 in impala::ImpalaHttpHandler::QuerySummaryHandler (this=0x8091f98, include_json_plan=true, include_summary=true, args=..., document=0x7f3e0a3af1c0) at /data/jenkins/workspace/impala-cdh6.1.x-exhaustive/repos/Impala/be/src/service/impala-http-handler.cc:763
#14 0x0000000001febd1d in impala::ImpalaHttpHandler::<lambda(const auto:5&, auto:6*)>::operator()<std::map<std::basic_string<char>, std::basic_string<char> >, rapidjson::GenericDocument<rapidjson::UTF8<> > >(const std::map<std::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::less<std::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::basic_string<char, std::char_traits<char>, std::allocator<char> > const, std::basic_string<char, std::char_traits<char>, std::allocator<char> > > > > &, rapidjson::GenericDocument<rapidjson::UTF8<char>, rapidjson::MemoryPoolAllocator<rapidjson::CrtAllocator>, rapidjson::CrtAllocator> *) const (__closure=0xec4aa38, args=..., doc=0x7f3e0a3af1c0) at /data/jenkins/workspace/impala-cdh6.1.x-exhaustive/repos/Impala/be/src/service/impala-http-handler.cc:132
#15 0x0000000001febd52 in boost::detail::function::void_function_obj_invoker2<impala::ImpalaHttpHandler::RegisterHandlers(impala::Webserver*)::<lambda(const auto:5&, auto:6*)>, void, const std::map<std::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::less<std::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<const std::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::basic_string<char, std::char_traits<char>, std::allocator<char> > > > >&, rapidjson::GenericDocument<rapidjson::UTF8<char>, rapidjson::MemoryPoolAllocator<rapidjson::CrtAllocator>, rapidjson::CrtAllocator>*>::invoke(boost::detail::function::function_buffer &, const std::map<std::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::less<std::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::basic_string<char, std::char_traits<char>, std::allocator<char> > const, std::basic_string<char, std::char_traits<char>, std::allocator<char> > > > > &, rapidjson::GenericDocument<rapidjson::UTF8<char>, rapidjson::MemoryPoolAllocator<rapidjson::CrtAllocator>, rapidjson::CrtAllocator> *) (function_obj_ptr=..., a0=..., a1=0x7f3e0a3af1c0) at /data/jenkins/workspace/impala-cdh6.1.x-exhaustive/Impala-Toolchain/boost-1.57.0-p3/include/boost/function/function_template.hpp:153
#16 0x000000000210d82c in boost::function2<void, std::map<std::string, std::string, std::less<std::string>, std::allocator<std::pair<std::string const, std::string> > > const&, rapidjson::GenericDocument<rapidjson::UTF8<char>, rapidjson::MemoryPoolAllocator<rapidjson::CrtAllocator>, rapidjson::CrtAllocator>*>::operator() (this=0xec4aa30, a0=..., a1=0x7f3e0a3af1c0) at /data/jenkins/workspace/impala-cdh6.1.x-exhaustive/Impala-Toolchain/boost-1.57.0-p3/include/boost/function/function_template.hpp:767
#17 0x000000000210b880 in impala::Webserver::RenderUrlWithTemplate (this=0xeef32c0, arguments=..., url_handler=..., output=0x7f3e0a3af7e0, content_type=0x7f3e0a3af95c) at /data/jenkins/workspace/impala-cdh6.1.x-exhaustive/repos/Impala/be/src/util/webserver.cc:446
#18 0x000000000210b41e in impala::Webserver::BeginRequestCallback (this=0xeef32c0, connection=0xecec000, request_info=0xecec000) at /data/jenkins/workspace/impala-cdh6.1.x-exhaustive/repos/Impala/be/src/util/webserver.cc:418
#19 0x000000000210aebf in impala::Webserver::BeginRequestCallbackStatic (connection=0xecec000) at /data/jenkins/workspace/impala-cdh6.1.x-exhaustive/repos/Impala/be/src/util/webserver.cc:377
#20 0x000000000211e5f6 in handle_request (conn=0xecec000) at /data/jenkins/workspace/impala-cdh6.1.x-exhaustive/repos/Impala/be/src/thirdparty/squeasel/squeasel.c:3874
#21 0x0000000002120879 in process_new_connection (conn=0xecec000) at /data/jenkins/workspace/impala-cdh6.1.x-exhaustive/repos/Impala/be/src/thirdparty/squeasel/squeasel.c:4544
#22 0x0000000002120f4d in worker_thread (thread_func_param=0xd60db80) at /data/jenkins/workspace/impala-cdh6.1.x-exhaustive/repos/Impala/be/src/thirdparty/squeasel/squeasel.c:4676
#23 0x00007f3e98d25e25 in start_thread () from /lib64/libpthread.so.0
#24 0x00007f3e98a5334d in clone () from /lib64/libc.so.6
{noformat}

I inspected the core file and found that the query id was c7473472c492fc55:b2aa511300000000. From the logs, that query was {{"select count\(*) from tpch.supplier"}} and ran from a custom cluster test. {{test_query_concurrency.py}} is the only one that uses tpch.supplier.

I checked the {{client_request_state_map_}} and it looks healthy, but empty:

{noformat}
(gdb) p &impala::ExecEnv::exec_env_->impala_server_->client_request_state_map_
$20 = (impala::ImpalaServer::ClientRequestStateMap *) 0xd2721c0
(gdb) p impala::ExecEnv::exec_env_->impala_server_->client_request_state_map_.shards_
... (an empty map)
{noformat}

However, in the stack trace it is broken. Observer how it points to {{0x1c0}}, suggesting that the ExecEnv returned a null pointer for ImpalaServer.
{noformat}
#11 0x0000000001f6cca8 in impala::ScopedShardedMapRef<std::shared_ptr<impala::ClientRequestState> >::ScopedShardedMapRef (this=0x7f3e0a3aeb90, query_id=..., sharded_map=0x1c0) at /data/jenkins/workspace/impala-cdh6.1.x-exhaustive/repos/Impala/be/src/util/sharded-query-map-util.h:99
{noformat}

It seems that the webserver starts accepting requests before the ImpalaServer registers itself with the ExecEnv. Thus if a client requests a query status before that, the ExecEnv will still return a nullptr.

How did the client get that query ID though to poll the server? The test gets them through "ImpaladService.get_in_flight_queries()" and no query should be able to register itself before the server has started up. In particular we set the Impala server in the ExecEnv during startup and before accepting queries.


> Timeout when starting test_query_expiration custom cluster
> ----------------------------------------------------------
>
>                 Key: IMPALA-6955
>                 URL: https://issues.apache.org/jira/browse/IMPALA-6955
>             Project: IMPALA
>          Issue Type: Bug
>          Components: Backend
>    Affects Versions: Impala 3.1.0
>            Reporter: Vuk Ercegovac
>            Priority: Critical
>              Labels: broken-build, flaky
>
> Ran into the following crash on a rhel test recently:
> {noformat}
> Error starting cluster: num_known_live_backends did not reach expected value in time{noformat}
> Backtrace:
> {noformat}
> #0 0x00007f92365185c9 in raise () from /lib64/libc.so.6
> #1 0x00007f9236519cd8 in abort () from /lib64/libc.so.6
> #2 0x00007f92393841a5 in os::abort(bool) () from /opt/toolchain/sun-jdk-64bit-1.8.0.05/jre/lib/amd64/server/libjvm.so
> #3 0x00007f9239514843 in VMError::report_and_die() () from /opt/toolchain/sun-jdk-64bit-1.8.0.05/jre/lib/amd64/server/libjvm.so
> #4 0x00007f9239389562 in JVM_handle_linux_signal () from /opt/toolchain/sun-jdk-64bit-1.8.0.05/jre/lib/amd64/server/libjvm.so
> #5 0x00007f92393804f3 in signalHandler(int, siginfo*, void*) () from /opt/toolchain/sun-jdk-64bit-1.8.0.05/jre/lib/amd64/server/libjvm.so
> #6 <signal handler called>
> #7 0x00000000016fded0 in base::subtle::NoBarrier_CompareAndSwap (ptr=0x238, old_value=0, new_value=1) at /data/jenkins/workspace/impala-cdh6.x-exhaustive-rhel7/repos/Impala/be/src/gutil/atomicops-internals-x86.h:85
> #8 0x00000000016fdf50 in base::subtle::Acquire_CompareAndSwap (ptr=0x238, old_value=0, new_value=1) at /data/jenkins/workspace/impala-cdh6.x-exhaustive-rhel7/repos/Impala/be/src/gutil/atomicops-internals-x86.h:138
> #9 0x00000000016fe26c in base::SpinLock::Lock (this=0x238) at /data/jenkins/workspace/impala-cdh6.x-exhaustive-rhel7/repos/Impala/be/src/gutil/spinlock.h:74
> #10 0x00000000016fe2f6 in impala::SpinLock::lock (this=0x238) at /data/jenkins/workspace/impala-cdh6.x-exhaustive-rhel7/repos/Impala/be/src/util/spinlock.h:34
> #11 0x0000000001aa8c96 in impala::ScopedShardedMapRef<std::shared_ptr<impala::ClientRequestState> >::ScopedShardedMapRef (this=0x7f91aa81eb90, query_id=..., sharded_map=0x1c0) at /data/jenkins/workspace/impala-cdh6.x-exhaustive-rhel7/repos/Impala/be/src/util/sharded-query-map-util.h:99
> #12 0x0000000001a999e2 in impala::ImpalaServer::GetClientRequestState (this=0xa569000, query_id=...) at /data/jenkins/workspace/impala-cdh6.x-exhaustive-rhel7/repos/Impala/be/src/service/impala-server.cc:2123
> #13 0x0000000001b3ace6 in impala::ImpalaHttpHandler::QuerySummaryHandler (this=0x6f057a0, include_json_plan=true, include_summary=true, args=..., document=0x7f91aa81f230) at /data/jenkins/workspace/impala-cdh6.x-exhaustive-rhel7/repos/Impala/be/src/service/impala-http-handler.cc:755
> #14 0x0000000001b3cc11 in impala::ImpalaHttpHandler::<lambda(const auto:5&, auto:6*)>::operator()<std::map<std::basic_string<char>, std::basic_string<char> >, rapidjson::GenericDocument<rapidjson::UTF8<> > >(const std::map<std::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::less<std::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::basic_string<char, std::char_traits<char>, std::allocator<char> > const, std::basic_string<char, std::char_traits<char>, std::allocator<char> > > > > &, rapidjson::GenericDocument<rapidjson::UTF8<char>, rapidjson::MemoryPoolAllocator<rapidjson::CrtAllocator> > *) const (__closure=0xd9884b8, args=..., doc=0x7f91aa81f230) at /data/jenkins/workspace/impala-cdh6.x-exhaustive-rhel7/repos/Impala/be/src/service/impala-http-handler.cc:132
> #15 0x0000000001b3cc46 in boost::detail::function::void_function_obj_invoker2<impala::ImpalaHttpHandler::RegisterHandlers(impala::Webserver*)::<lambda(const auto:5&, auto:6*)>, void, const std::map<std::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::less<std::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<const std::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::basic_string<char, std::char_traits<char>, std::allocator<char> > > > >&, rapidjson::GenericDocument<rapidjson::UTF8<char>, rapidjson::MemoryPoolAllocator<rapidjson::CrtAllocator> >*>::invoke(boost::detail::function::function_buffer &, const std::map<std::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::le\
> ss<std::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::basic_string<char, std::char_traits<char>, std::allocator<char> > const, std::basic_string<char, std::char_traits<char>, std::allocator<char> > > > > &, rapidjson::GenericDocument<rapidjson::UTF8<char>, rapidjson::MemoryPoolAllocator<rapidjson::CrtAllocator> > *) (function_obj_ptr=..., a0=..., a1=0x7f91aa81f230) at /data/jenkins/workspace/impala-cdh6.x-exhaustive-rhel7/Impala-Toolchain/boost-1.57.0-p3/include/boost/function/function_template.hpp:153
> #16 0x0000000001c4f528 in boost::function2<void, std::map<std::string, std::string, std::less<std::string>, std::allocator<std::pair<std::string const, std::string> > > const&, rapidjson::GenericDocument<rapidjson::UTF8<char>, rapidjson::MemoryPoolAllocator<rapidjson::CrtAllocator> >*>::operator() (this=0xd9884b0, a0=..., a1=0x7f91aa81f230) at /data/jenkins/workspace/impala-cdh6.x-exhaustive-rhel7/Impala-Toolchain/boost-1.57.0-p3/include/boost/function/function_template.hpp:767
> #17 0x0000000001c4d877 in impala::Webserver::RenderUrlWithTemplate (this=0xc590b40, arguments=..., url_handler=..., output=0x7f91aa81f820, content_type=0x7f91aa81f99c) at /data/jenkins/workspace/impala-cdh6.x-exhaustive-rhel7/repos/Impala/be/src/util/webserver.cc:440
> #18 0x0000000001c4d384 in impala::Webserver::BeginRequestCallback (this=0xc590b40, connection=0xc46c000, request_info=0xc46c000) at /data/jenkins/workspace/impala-cdh6.x-exhaustive-rhel7/repos/Impala/be/src/util/webserver.cc:411
> #19 0x0000000001c4ce25 in impala::Webserver::BeginRequestCallbackStatic (connection=0xc46c000) at /data/jenkins/workspace/impala-cdh6.x-exhaustive-rhel7/repos/Impala/be/src/util/webserver.cc:370{noformat}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-all-unsubscribe@impala.apache.org
For additional commands, e-mail: issues-all-help@impala.apache.org