You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by st...@apache.org on 2021/03/22 14:25:31 UTC

[impala] 01/02: IMPALA-10552: Support external frontends supplying timeline for profile

This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 879986ea6f478cfa35f78249d25441fc6d15cc3d
Author: John Sherman <jf...@cloudera.com>
AuthorDate: Mon Jul 20 20:27:02 2020 +0000

    IMPALA-10552: Support external frontends supplying timeline for profile
    
    - Add EXTERNAL_FRONTEND as a client session type
    - Use EXTERNAL_FRONTEND session type for clients connected to
      external frontend interface.
    - Rename Query Timeline to Impala Backend Timeline for external
      frontends
      - the query timeline is no longer an end to end timeline when
        executing a plan from an external frontend
    - External frontends can provide timeline information through a
      TExecRequest by filling in the timeline field with a valid
      TEventSequence
    - The frontend timeline and backend timeline are completely separate
      entities, meaning there is no overall attempt to capture the timing
      end to end
      - This is due to the fact that the frontend and Impala may not share
        the same time source (or even machine).
      - It is safe to add together the backend + frontend timeline times
      to get a rough idea how long the query took end to end to execute,
      but keep in mind that this number does not capture the time it
      took the frontend to send the plan to the backend (Impala) nor does
      it capture how long it took the end user to read the results.
    
    Example timeline with external frontend:
      Frontend Timeline: 3s016ms
         - Analysis finished: 1s130ms (1s130ms)
         - Calcite plan generated: 2s170ms (1s040ms)
         - Metadata load started: 2s245ms (74.486ms)
         - Metadata load finished. loaded-tables=1: 2s654ms (409.847ms)
         - Single node plan created: 2s726ms (71.659ms)
         - Runtime filters computed: 2s756ms (30.000ms)
         - Distributed plan created: 2s761ms (5.265ms)
         - Execution request created: 2s890ms (128.387ms)
         - Impala plan generated: 2s891ms (1.508ms)
         - Planning finished: 2s893ms (1.894ms)
         - Submitted query: 3s016ms (122.377ms)
      Impala Backend Timeline: 79.998ms
         - Query submitted: 0.000ns (0.000ns)
         - Submit for admission: 0.000ns (0.000ns)
         - Completed admission: 0.000ns (0.000ns)
         - Ready to start on 1 backends: 3.999ms (3.999ms)
         - All 1 execution backends (2 fragment instances) started: 7.999ms (3.999ms)
         - Rows available: 55.999ms (47.999ms)
         - Execution cancelled: 79.998ms (23.999ms)
         - Released admission control resources: 79.998ms (0.000ns)
         - Unregister query: 79.998ms (0.000ns)
    
    Testing done:
    - Manual inspection of profiles on the Impala web UI
    - test_hs2.py
    - test_tpch_queries.py
    - test_tpcds_queries.py::TestTpcdsDecimalV2Query
    
    Co-authored-by: Kurt Deschler <kd...@cloudera.com>
    
    Change-Id: I2b3692b4118ea23c0f9f8ec4bcc27b0b68bb32ec
    Reviewed-on: http://gerrit.cloudera.org:8080/17183
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 be/src/service/client-request-state.cc | 11 +++++++++--
 be/src/service/impala-hs2-server.cc    | 10 +++++++---
 be/src/service/impala-server.cc        | 16 ++++++++++------
 common/thrift/Query.thrift             |  5 ++++-
 4 files changed, 30 insertions(+), 12 deletions(-)

diff --git a/be/src/service/client-request-state.cc b/be/src/service/client-request-state.cc
index 23b446f..e5a31cf 100644
--- a/be/src/service/client-request-state.cc
+++ b/be/src/service/client-request-state.cc
@@ -130,14 +130,21 @@ ClientRequestState::ClientRequestState(const TQueryCtx& query_ctx, Frontend* fro
   num_rows_fetched_from_cache_counter_ =
       ADD_COUNTER(server_profile_, "NumRowsFetchedFromCache", TUnit::UNIT);
   client_wait_timer_ = ADD_TIMER(server_profile_, "ClientFetchWaitTimer");
-  query_events_ = summary_profile_->AddEventSequence("Query Timeline");
+  bool is_external_fe = session_type() == TSessionType::EXTERNAL_FRONTEND;
+  // "Impala Backend Timeline" was specifically chosen to exploit the lexicographical
+  // ordering defined by the underlying std::map holding the timelines displayed in
+  // the web UI. This helps ensure that "Frontend Timeline" is displayed before
+  // "Impala Backend Timeline".
+  query_events_ = summary_profile_->AddEventSequence(
+      is_external_fe ? "Impala Backend Timeline" : "Query Timeline");
   query_events_->Start();
   profile_->AddChild(summary_profile_);
 
   profile_->set_name("Query (id=" + PrintId(query_id()) + ")");
   summary_profile_->AddInfoString("Session ID", PrintId(session_id()));
   summary_profile_->AddInfoString("Session Type", PrintThriftEnum(session_type()));
-  if (session_type() == TSessionType::HIVESERVER2) {
+  if (session_type() == TSessionType::HIVESERVER2 ||
+      session_type() == TSessionType::EXTERNAL_FRONTEND) {
     summary_profile_->AddInfoString("HiveServer2 Protocol Version",
         Substitute("V$0", 1 + session->hs2_version));
   }
diff --git a/be/src/service/impala-hs2-server.cc b/be/src/service/impala-hs2-server.cc
index 2f6858b..15dd523 100644
--- a/be/src/service/impala-hs2-server.cc
+++ b/be/src/service/impala-hs2-server.cc
@@ -309,7 +309,13 @@ void ImpalaServer::OpenSession(TOpenSessionResp& return_val,
       std::make_shared<SessionState>(this, session_id, secret);
   state->closed = false;
   state->start_time_ms = UnixMillis();
-  state->session_type = TSessionType::HIVESERVER2;
+  const ThriftServer::ConnectionContext* connection_context =
+    ThriftServer::GetThreadConnectionContext();
+  if (connection_context->server_name == EXTERNAL_FRONTEND_SERVER_NAME) {
+    state->session_type = TSessionType::EXTERNAL_FRONTEND;
+  } else {
+    state->session_type = TSessionType::HIVESERVER2;
+  }
   state->network_address = ThriftServer::GetThreadConnectionContext()->network_address;
   state->last_accessed_ms = UnixMillis();
   // request.client_protocol is not guaranteed to be a valid TProtocolVersion::type, so
@@ -323,8 +329,6 @@ void ImpalaServer::OpenSession(TOpenSessionResp& return_val,
   state->kudu_latest_observed_ts = 0;
 
   // If the username was set by a lower-level transport, use it.
-  const ThriftServer::ConnectionContext* connection_context =
-      ThriftServer::GetThreadConnectionContext();
   if (!connection_context->username.empty()) {
     state->connected_user = connection_context->username;
     if (!connection_context->do_as_user.empty()) {
diff --git a/be/src/service/impala-server.cc b/be/src/service/impala-server.cc
index f9c50bd..b460d25 100644
--- a/be/src/service/impala-server.cc
+++ b/be/src/service/impala-server.cc
@@ -1188,14 +1188,14 @@ Status ImpalaServer::ExecuteInternal(const TQueryCtx& query_ctx,
     TUniqueId query_id = (*query_handle)->query_id();
     // Generate TExecRequest here if one was not passed in or we want one
     // from the Impala planner to compare with
-    if (external_exec_request == nullptr || !FLAGS_dump_exec_request_path.empty()) {
-      // Takes the TQueryCtx and calls into the frontend to initialize the
-      // TExecRequest for this query.
+    if (!is_external_req || !FLAGS_dump_exec_request_path.empty()) {
+      // Takes the TQueryCtx and calls into the frontend to initialize the TExecRequest
+      // for this query.
       RETURN_IF_ERROR(query_handle->query_driver()->RunFrontendPlanner(query_ctx));
       DumpTExecReq((*query_handle)->exec_request(), "internal", query_id);
     }
 
-    if (external_exec_request != nullptr) {
+    if (is_external_req) {
       // Use passed in exec_request
       RETURN_IF_ERROR(query_handle->query_driver()->SetExternalPlan(
           query_ctx, *external_exec_request));
@@ -1209,7 +1209,10 @@ Status ImpalaServer::ExecuteInternal(const TQueryCtx& query_ctx,
     }
 
     const TExecRequest& result = (*query_handle)->exec_request();
-    (*query_handle)->query_events()->MarkEvent("Planning finished");
+    // If this is an external request, planning was done by the external frontend
+    if (!is_external_req) {
+      (*query_handle)->query_events()->MarkEvent("Planning finished");
+    }
     (*query_handle)->set_user_profile_access(result.user_has_profile_access);
     (*query_handle)->summary_profile()->AddEventSequence(
         result.timeline.name, result.timeline);
@@ -2513,7 +2516,8 @@ void ImpalaServer::UnregisterSessionTimeout(int32_t session_timeout) {
                   >= FLAGS_disconnected_session_timeout * 1000L) {
             // This session has no active connections and is past the disconnected session
             // timeout, so close it.
-            DCHECK_ENUM_EQ(session_state->session_type, TSessionType::HIVESERVER2);
+            DCHECK(session_state->session_type == TSessionType::HIVESERVER2 ||
+                session_state->session_type == TSessionType::EXTERNAL_FRONTEND);
             LOG(INFO) << "Closing session: " << PrintId(session_id)
                       << ", user: " << session_state->connected_user
                       << ", because it no longer  has any open connections. The last "
diff --git a/common/thrift/Query.thrift b/common/thrift/Query.thrift
index 31c0e13..ce0d654 100644
--- a/common/thrift/Query.thrift
+++ b/common/thrift/Query.thrift
@@ -480,10 +480,13 @@ struct TQueryOptions {
       PlanNodes.TMinmaxFilteringLevel.ROW_GROUP;
 }
 
-// Impala currently has two types of sessions: Beeswax and HiveServer2
+// Impala currently has three types of sessions: Beeswax, HiveServer2 and external
+// frontend. External frontend is a variation of HiveServer2 to support external
+// planning.
 enum TSessionType {
   BEESWAX = 0
   HIVESERVER2 = 1
+  EXTERNAL_FRONTEND = 2
 }
 
 // Client request including stmt to execute and query options.