You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by bo...@apache.org on 2023/02/10 16:18:10 UTC

[impala] 02/02: IMPALA-11850 Adds HTTP tracing headers when using the hs2-http protocol.

This is an automated email from the ASF dual-hosted git repository.

boroknagyz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit e17fd9a0d5428306dfa41a041a44c800824d72f6
Author: jasonmfehr <jf...@cloudera.com>
AuthorDate: Wed Dec 14 00:37:12 2022 +0000

    IMPALA-11850 Adds HTTP tracing headers when using the hs2-http protocol.
    
    When using the hs2 protocol with the http transport, include several
    tracing http headers by default.  These headers are:
    
      * X-Request-Id        -- client defined string that identifies the
                               http request, this string is meaningful only
                               to the client
      * X-Impala-Session-Id -- session id generated by the Impala backend,
                               will be omitted on http calls that occur
                               before this id has been generated
      * X-Impala-Query-Id   -- query id generated by the Impala backend,
                               will be omitted on http calls that occur
                               before this id has been generated
    
    The Impala shell includes these headers by default.  The command
    line argument --no_http_tracing has been added to remove these
    headers.
    
    The Impala backend logs out these headers if they are on the http
    request.  The log messages are written out at log level 2 (RPC).
    
    Testing:
      - manual testing (verified using debugging proxy and impala logs)
      - new python test
    
    Change-Id: I7857eb5ec03eba32e06ec8d4133480f2e958ad2f
    Reviewed-on: http://gerrit.cloudera.org:8080/19428
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 be/src/transport/THttpServer.cpp               |  22 ++++
 be/src/transport/THttpServer.h                 |  17 +++
 shell/ImpalaHttpClient.py                      |  29 +++++
 shell/impala_client.py                         |  39 +++++-
 shell/impala_shell.py                          |  15 ++-
 shell/impala_shell_config_defaults.py          |   3 +-
 shell/option_parser.py                         |   5 +
 tests/common/test_dimensions.py                |   4 +
 tests/custom_cluster/test_shell_commandline.py | 169 +++++++++++++++++++++++++
 9 files changed, 295 insertions(+), 8 deletions(-)

diff --git a/be/src/transport/THttpServer.cpp b/be/src/transport/THttpServer.cpp
index 8189d16c2..8498ac1f2 100644
--- a/be/src/transport/THttpServer.cpp
+++ b/be/src/transport/THttpServer.cpp
@@ -33,6 +33,7 @@
 
 #include "gen-cpp/Frontend_types.h"
 #include "util/metrics.h"
+#include "common/logging.h"
 
 DECLARE_bool(trusted_domain_use_xff_header);
 DECLARE_bool(saml2_ee_test_mode);
@@ -125,6 +126,10 @@ THttpServer::~THttpServer() {
   #define THRIFT_strcasestr(haystack, needle) strcasestr(haystack, needle)
 #endif
 
+const std::string THttpServer::HEADER_REQUEST_ID = "X-Request-Id";
+const std::string THttpServer::HEADER_IMPALA_SESSION_ID = "X-Impala-Session-Id";
+const std::string THttpServer::HEADER_IMPALA_QUERY_ID = "X-Impala-Query-Id";
+
 void THttpServer::parseHeader(char* header) {
   char* colon = strchr(header, ':');
   if (colon == NULL) {
@@ -169,6 +174,15 @@ void THttpServer::parseHeader(char* header) {
   } else if (check_trusted_auth_header_
       && THRIFT_strncasecmp(header, FLAGS_trusted_auth_header.c_str(), sz) == 0) {
     found_trusted_auth_header_ = true;
+  } else if (THRIFT_strncasecmp(header, HEADER_REQUEST_ID.c_str(), sz) == 0) {
+    header_x_request_id_ = string(value);
+    StripWhiteSpace(&header_x_request_id_);
+  } else if (THRIFT_strncasecmp(header, HEADER_IMPALA_SESSION_ID.c_str(), sz) == 0) {
+    header_x_session_id_ = string(value);
+    StripWhiteSpace(&header_x_session_id_);
+  } else if (THRIFT_strncasecmp(header, HEADER_IMPALA_QUERY_ID.c_str(), sz) == 0) {
+    header_x_query_id_ = string(value);
+    StripWhiteSpace(&header_x_query_id_);
   }
 }
 
@@ -229,6 +243,14 @@ bool THttpServer::parseStatusLine(char* status) {
 }
 
 void THttpServer::headersDone() {
+  if (!header_x_request_id_.empty() || !header_x_session_id_.empty() ||
+      !header_x_query_id_.empty()) {
+    VLOG_RPC << "HTTP Connection Tracing Headers"
+        << (header_x_request_id_.empty() ? "" : " x-request-id=" + header_x_request_id_)
+        << (header_x_session_id_.empty() ? "" : " x-session-id=" + header_x_session_id_)
+        << (header_x_query_id_.empty() ? "" : " x-query-id=" + header_x_query_id_);
+  }
+
   if (!has_ldap_ && !has_kerberos_ && !has_saml_ && !has_jwt_) {
     // We don't need to authenticate.
     resetAuthState();
diff --git a/be/src/transport/THttpServer.h b/be/src/transport/THttpServer.h
index f37620db3..b623e6491 100644
--- a/be/src/transport/THttpServer.h
+++ b/be/src/transport/THttpServer.h
@@ -152,6 +152,14 @@ public:
   void setCallbacks(const HttpCallbacks& callbacks) { callbacks_ = callbacks; }
 
 protected:
+  // Names of HTTP headers that are meaningful.
+  // Client-defined string identifying the HTTP request, meaningful only to the client.
+  static const std::string HEADER_REQUEST_ID;
+  // Impala session id specified by the Impala backend.  Used for tracing HTTP requests.
+  static const std::string HEADER_IMPALA_SESSION_ID;
+  // Impala query id specified by the Impala backend.  Used for tracing HTTP requests.
+  static const std::string HEADER_IMPALA_QUERY_ID;
+
   void readHeaders();
   virtual void parseHeader(char* header);
   virtual bool parseStatusLine(char* status);
@@ -223,6 +231,15 @@ protected:
   // Used to collect all information about the http request. Can be passed to the
   // Frontend. Currently only used by SAML SSO.
   impala::TWrappedHttpRequest* wrapped_request_ = nullptr;
+
+  // The value from the 'X-Request-Id' header.
+  std::string header_x_request_id_ = "";
+
+  // The value from the 'X-Impala-Session-Id' header.
+  std::string header_x_session_id_ = "";
+
+  // The value from the 'X-Impala-Query-Id' header.
+  std::string header_x_query_id_ = "";
 };
 
 /**
diff --git a/shell/ImpalaHttpClient.py b/shell/ImpalaHttpClient.py
index 5d7de079c..88cc0f054 100644
--- a/shell/ImpalaHttpClient.py
+++ b/shell/ImpalaHttpClient.py
@@ -133,6 +133,7 @@ class ImpalaHttpClient(TTransportBase):
     self.__get_custom_headers_func = None
     self.__basic_auth = None
     self.__kerb_service = None
+    self.__add_custom_headers_funcs = []
 
   @staticmethod
   def basic_proxy_auth_header(proxy):
@@ -227,13 +228,41 @@ class ImpalaHttpClient(TTransportBase):
     # auth mechanism: None
     self.__get_custom_headers_func = self.getCustomHeadersWithoutAuth
 
+  # Whenever http(s) calls are made to the backend impala, each function
+  # added through this method will be called.  Thus, arbitrary custom
+  # headers can be set on each request.
+  # parameters:
+  #  funcs - tuple of functions where each takes no arguments and returns
+  #      a dict of http headers
+  # Note:  if the custom function returns a http header with a name that
+  # does not start with "X-" or "x-", it will cause an error to be thrown
+  def addCustomHeaderFunc(self, *funcs):
+    if funcs is None:
+      return
+
+    for f in funcs:
+      self.__add_custom_headers_funcs.append(f)
+
   # Update HTTP headers based on the saved cookies and auth mechanism.
   def refreshCustomHeaders(self):
+    self.__custom_headers = {}
+
     if self.__get_custom_headers_func:
       cookie_header, has_auth_cookie = self.getHttpCookieHeaderForRequest()
       self.__custom_headers = \
           self.__get_custom_headers_func(cookie_header, has_auth_cookie)
 
+    for f in self.__add_custom_headers_funcs:
+      headers = f()
+      if headers is not None:
+        for key in headers:
+          assert key[0:2].lower() == "x-", \
+            "header '{0}' is not valid, all custom headers must start with "\
+            "'X-' or 'x-'".format(key)
+          assert key not in self.__custom_headers, \
+            "header '{0}' already exists in custom headers dictionary".format(key)
+          self.__custom_headers[key] = headers[key]
+
   # Return first value as a cookie list for Cookie header. It's a list of name-value
   # pairs in the form of <cookie-name>=<cookie-value>. Pairs in the list are separated by
   # a semicolon and a space ('; ').
diff --git a/shell/impala_client.py b/shell/impala_client.py
index e2a058a47..af6739cd0 100755
--- a/shell/impala_client.py
+++ b/shell/impala_client.py
@@ -30,6 +30,7 @@ import ssl
 import sys
 import time
 from datetime import datetime
+import uuid
 
 from beeswaxd import BeeswaxService
 from beeswaxd.BeeswaxService import QueryState
@@ -133,7 +134,7 @@ class ImpalaClient(object):
                ldap_password=None, use_ldap=False, client_connect_timeout_ms=60000,
                verbose=True, use_http_base_transport=False, http_path=None,
                http_cookie_names=None, http_socket_timeout_s=None, value_converter=None,
-               connect_max_tries=4, rpc_stdout=False, rpc_file=None):
+               connect_max_tries=4, rpc_stdout=False, rpc_file=None, http_tracing=True):
     self.connected = False
     self.impalad_host = impalad[0]
     self.impalad_port = int(impalad[1])
@@ -155,6 +156,7 @@ class ImpalaClient(object):
     self.use_http_base_transport = use_http_base_transport
     self.http_path = http_path
     self.http_cookie_names = http_cookie_names
+    self.http_tracing = http_tracing
     # This is set from ImpalaShell's signal handler when a query is cancelled
     # from command line via CTRL+C. It is used to suppress error messages of
     # query cancellation.
@@ -429,6 +431,8 @@ class ImpalaClient(object):
     else:
       transport.setNoneAuth()
 
+    transport.addCustomHeaderFunc(self.get_custom_http_headers)
+
     # Without buffering Thrift would call socket.recv() each time it deserializes
     # something (e.g. a member in a struct).
     transport = TBufferedTransport(transport)
@@ -643,6 +647,12 @@ class ImpalaClient(object):
     if not self.connected:
       raise DisconnectedException("Not connected (use CONNECT to establish a connection)")
 
+  def get_custom_http_headers(self):
+    # When the transport is http, subclasses can override this function
+    # to add arbitrary http headers.
+    return None
+
+
 class ImpalaHS2Client(ImpalaClient):
   """Impala client. Uses the HS2 protocol plus Impala-specific extensions."""
   def __init__(self, *args, **kwargs):
@@ -672,6 +682,9 @@ class ImpalaHS2Client(ImpalaClient):
     if self.rpc_stdout or self.rpc_stdout is not None:
       self.thrift_printer = ThriftPrettyPrinter()
 
+    self._base_request_id = str(uuid.uuid1())
+    self._request_num = 0
+
   def _get_thrift_client(self, protocol):
     return ImpalaHiveServer2Service.Client(protocol)
 
@@ -697,6 +710,25 @@ class ImpalaHS2Client(ImpalaClient):
 
     self._populate_query_options()
 
+  def get_custom_http_headers(self):
+    headers = {}
+
+    if self.http_tracing:
+      session_id = self.get_session_id()
+      if session_id is not None:
+        headers["X-Impala-Session-Id"] = session_id
+
+      current_query_id = self.get_query_id_str(self._current_query_handle)
+      if current_query_id is not None:
+        headers["X-Impala-Query-Id"] = current_query_id
+
+      assert getattr(self, "_current_request_id", None) is not None, \
+        "request id was not set"
+      headers["X-Request-Id"] = self._current_request_id
+
+    return headers
+
+
   def close_connection(self):
     if self.session_handle is not None:
       # Attempt to close session explicitly. Do not fail if there is an error
@@ -813,7 +845,6 @@ class ImpalaHS2Client(ImpalaClient):
         # Attach the schema to the handle for convenience.
         handle.schema = resp.schema
       handle.is_closed = False
-      self._clear_current_query_handle()
       return handle
     finally:
       self._clear_current_query_handle()
@@ -1075,6 +1106,10 @@ class ImpalaHS2Client(ImpalaClient):
     If 'retry_on_error' is true, the rpc is retried if an exception is raised. The maximum
     number of tries is determined by 'self.max_tries'. Retries, if enabled, are attempted
     for all exceptions other than TApplicationException."""
+
+    self._request_num += 1
+    self._current_request_id = "{0}-{1}".format(self._base_request_id, self._request_num)
+
     self._check_connected()
     num_tries = 1
     max_tries = num_tries
diff --git a/shell/impala_shell.py b/shell/impala_shell.py
index 5101a065e..066dcb0ad 100755
--- a/shell/impala_shell.py
+++ b/shell/impala_shell.py
@@ -263,6 +263,7 @@ class ImpalaShell(cmd.Cmd, object):
     self.http_path = options.http_path
     self.fetch_size = options.fetch_size
     self.http_cookie_names = options.http_cookie_names
+    self.http_tracing = not options.no_http_tracing
 
     # Due to a readline bug in centos/rhel7, importing it causes control characters to be
     # printed. This breaks any scripting against the shell in non-interactive mode. Since
@@ -610,7 +611,8 @@ class ImpalaShell(cmd.Cmd, object):
                           self.client_connect_timeout_ms, self.verbose,
                           use_http_base_transport=False, http_path=self.http_path,
                           http_cookie_names=None, value_converter=value_converter,
-                          rpc_stdout=self.rpc_stdout, rpc_file=self.rpc_file)
+                          rpc_stdout=self.rpc_stdout, rpc_file=self.rpc_file,
+                          http_tracing=self.http_tracing)
       elif protocol == 'hs2-http':
         return StrictHS2Client(self.impalad, self.fetch_size, self.kerberos_host_fqdn,
                           self.use_kerberos, self.kerberos_service_name, self.use_ssl,
@@ -619,7 +621,7 @@ class ImpalaShell(cmd.Cmd, object):
                           use_http_base_transport=True, http_path=self.http_path,
                           http_cookie_names=self.http_cookie_names,
                           value_converter=value_converter, rpc_stdout=self.rpc_stdout,
-                          rpc_file=self.rpc_file)
+                          rpc_file=self.rpc_file, http_tracing=self.http_tracing)
     if protocol == 'hs2':
       return ImpalaHS2Client(self.impalad, self.fetch_size, self.kerberos_host_fqdn,
                           self.use_kerberos, self.kerberos_service_name, self.use_ssl,
@@ -627,7 +629,8 @@ class ImpalaShell(cmd.Cmd, object):
                           self.client_connect_timeout_ms, self.verbose,
                           use_http_base_transport=False, http_path=self.http_path,
                           http_cookie_names=None, value_converter=value_converter,
-                          rpc_stdout=self.rpc_stdout, rpc_file=self.rpc_file)
+                          rpc_stdout=self.rpc_stdout, rpc_file=self.rpc_file,
+                          http_tracing=self.http_tracing)
     elif protocol == 'hs2-http':
       return ImpalaHS2Client(self.impalad, self.fetch_size, self.kerberos_host_fqdn,
                           self.use_kerberos, self.kerberos_service_name, self.use_ssl,
@@ -638,7 +641,8 @@ class ImpalaShell(cmd.Cmd, object):
                           http_socket_timeout_s=self.http_socket_timeout_s,
                           value_converter=value_converter,
                           connect_max_tries=self.connect_max_tries,
-                          rpc_stdout=self.rpc_stdout, rpc_file=self.rpc_file)
+                          rpc_stdout=self.rpc_stdout, rpc_file=self.rpc_file,
+                          http_tracing=self.http_tracing)
     elif protocol == 'beeswax':
       return ImpalaBeeswaxClient(self.impalad, self.fetch_size, self.kerberos_host_fqdn,
                           self.use_kerberos, self.kerberos_service_name, self.use_ssl,
@@ -1346,7 +1350,8 @@ class ImpalaShell(cmd.Cmd, object):
         num_rows = 0
 
         for rows in rows_fetched:
-          # IMPALA-4418: Break out of the loop to prevent printing an unnecessary empty line.
+          # IMPALA-4418: Break out of the loop to prevent printing an unnecessary
+          # empty line.
           if len(rows) == 0:
             continue
           self.output_stream.write(rows)
diff --git a/shell/impala_shell_config_defaults.py b/shell/impala_shell_config_defaults.py
index 59f6cd043..b07cd86a6 100644
--- a/shell/impala_shell_config_defaults.py
+++ b/shell/impala_shell_config_defaults.py
@@ -60,5 +60,6 @@ impala_shell_defaults = {
             'http_socket_timeout_s': None,
             'global_config_default_path': '/etc/impalarc',
             'strict_hs2_protocol': False,
-            'hs2_fp_format': None
+            'hs2_fp_format': None,
+            'no_http_tracing': False
     }
diff --git a/shell/option_parser.py b/shell/option_parser.py
index 8d28a7404..b385be854 100755
--- a/shell/option_parser.py
+++ b/shell/option_parser.py
@@ -333,6 +333,11 @@ def get_option_parser(defaults):
                     "returned in an http response by the server or an intermediate proxy "
                     "then it will be included in each subsequent request for the same "
                     "connection.")
+  parser.add_option("--no_http_tracing", dest="no_http_tracing",
+                    action="store_true",
+                    help="Tracing http headers 'X-Request-Id', 'X-Impala-Session-Id', "
+                    "and 'X-Impala-Query-Id' will not be added to each http request "
+                    "(hs2-http protocol only).")
   parser.add_option("--hs2_fp_format", type="str",
                     dest="hs2_fp_format", default=None,
                     help="Sets the printing format specification for floating point "
diff --git a/tests/common/test_dimensions.py b/tests/common/test_dimensions.py
index e551d572b..7c4c3b2b5 100644
--- a/tests/common/test_dimensions.py
+++ b/tests/common/test_dimensions.py
@@ -137,6 +137,10 @@ def create_client_protocol_dimension():
   return ImpalaTestDimension('protocol', 'beeswax', 'hs2', 'hs2-http')
 
 
+def create_client_protocol_http_transport():
+  return ImpalaTestDimension('protocol', 'hs2-http')
+
+
 def create_client_protocol_strict_dimension():
   # only support strict dimensions if the file system is HDFS, since that is
   # where the hive cluster is run.
diff --git a/tests/custom_cluster/test_shell_commandline.py b/tests/custom_cluster/test_shell_commandline.py
new file mode 100644
index 000000000..c7379a93d
--- /dev/null
+++ b/tests/custom_cluster/test_shell_commandline.py
@@ -0,0 +1,169 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import os
+import pytest
+import re
+import tempfile
+
+from tests.common.custom_cluster_test_suite import CustomClusterTestSuite
+from tests.common.test_dimensions import create_client_protocol_http_transport
+from time import sleep
+from tests.shell.util import run_impala_shell_cmd
+
+
+class TestImpalaShellCommandLine(CustomClusterTestSuite):
+  """Runs tests of the Impala shell by first standing up an Impala cluster with
+  specific startup flags.  Then, the Impala shell is launched with specific arguments
+  in a separate process.  Assertions are done by scanning the shell output and Impala
+  server logs for expected strings."""
+
+  LOG_DIR_HTTP_TRACING = tempfile.mkdtemp(prefix="http_tracing")
+  LOG_DIR_HTTP_TRACING_OFF = tempfile.mkdtemp(prefix="http_tracing_off")
+  IMPALA_ID_RE = "([0-9a-f]{16}:[0-9a-f]{16})"
+
+  @classmethod
+  def get_workload(self):
+    return 'functional-query'
+
+  @classmethod
+  def add_test_dimensions(cls):
+    cls.ImpalaTestMatrix.add_dimension(create_client_protocol_http_transport())
+
+  @pytest.mark.execute_serially
+  @CustomClusterTestSuite.with_args("-log_dir={0} -v 2".format(LOG_DIR_HTTP_TRACING))
+  def test_http_tracing_headers(self, vector):
+    """Asserts that tracing headers are automatically added by the impala shell to
+    all calls to the backend impala engine made using the hs2 over http protocol.
+    The impala coordinator logs are searched to ensure these tracing headers were added
+    and also were passed through to the coordinator."""
+    args = ['--protocol', 'hs2-http', '-q', 'select version();profile']
+    result = run_impala_shell_cmd(vector, args)
+
+    # Shut down cluster to ensure logs flush to disk.
+    sleep(5)
+    self._stop_impala_cluster()
+
+    # Ensure the query ran successfully.
+    assert result.stdout.find("version()") > -1
+    assert result.stdout.find("impalad version") > -1
+    assert result.stdout.find("Query Runtime Profile") > -1
+
+    request_id_base = ""
+    request_id_serialnum = 0
+    session_id = ""
+    query_id = ""
+    last_known_query_id = ""
+    tracing_lines_count = 0
+
+    request_id_re = re.compile("x-request-id=([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-"
+                               "[0-9a-f]{4}-[0-9a-f]{12})-(\\d+)")
+    session_id_re = re.compile("x-session-id={0}"
+                               .format(TestImpalaShellCommandLine.IMPALA_ID_RE))
+    query_id_re = re.compile("x-query-id={0}"
+                               .format(TestImpalaShellCommandLine.IMPALA_ID_RE))
+    profile_query_id_re = re.compile("Query \\(id={0}\\)"
+                               .format(TestImpalaShellCommandLine.IMPALA_ID_RE))
+
+    # Find all HTTP Connection Tracing log lines.
+    with open(os.path.join(self.LOG_DIR_HTTP_TRACING, "impalad.INFO")) as log_file:
+      for line in log_file:
+        if line.find("HTTP Connection Tracing Headers") > -1:
+          tracing_lines_count += 1
+
+          # The impala shell builds a request_id that consists of the same randomly
+          # generated uuid and a serially increasing integer appended on the end.
+          # Ensure both these conditions are met.
+          m = request_id_re.search(line)
+          assert m is not None, \
+            "did not find request id in HTTP connection tracing log line '{0}'" \
+            .format(line)
+
+          if request_id_base == "":
+            # The current line is the very first HTTP connection tracing line in the logs.
+            request_id_base = m.group(1)
+          else:
+            assert request_id_base == m.group(1), \
+              "base request id expected '{0}', actual '{1}'" \
+              .format(request_id_base, m.group(1))
+
+          request_id_serialnum += 1
+          assert request_id_serialnum == int(m.group(2)), \
+            "request id serial number expected '{0}', actual '{1}'" \
+            .format(request_id_serialnum, m.group(2))
+
+          # The session_id is generated by impala and must be the same once it
+          # appears in a tracing log line.
+          m = session_id_re.search(line)
+          if m is not None:
+            if session_id == "":
+              session_id = m.group(1)
+            else:
+              assert session_id == m.group(1), \
+                "session id expected '{0}', actual '{1}'".format(session_id, m.group(1))
+
+          # The query_id is generated by impala and must be the same for the
+          # duration of the query.
+          m = query_id_re.search(line)
+          if m is None:
+            query_id = ""
+          else:
+            if query_id == "":
+              query_id = m.group(1)
+              last_known_query_id = query_id
+            else:
+              assert query_id == m.group(1), \
+                "query id expected '{0}', actual '{1}'".format(query_id, m.group(1))
+
+    # Assert that multiple HTTP connection tracing log lines were found.
+    assert tracing_lines_count > 10, \
+      "did not find enough HTTP connection tracing log lines, found {0} lines" \
+      .format(tracing_lines_count)
+
+    # Ensure the last found query id matches the actual query id
+    # from the impala query profile.
+    m = profile_query_id_re.search(result.stdout)
+    if m is not None:
+      assert last_known_query_id == m.group(1), \
+        "impala query profile id, expected '{0}', actual '{1}'" \
+        .format(last_known_query_id, m.group(1))
+    else:
+      pytest.fail("did not find Impala query id in shell stdout")
+
+  @pytest.mark.execute_serially
+  @CustomClusterTestSuite.with_args("-log_dir={0} -v 2".format(LOG_DIR_HTTP_TRACING_OFF))
+  def test_http_tracing_headers_off(self, vector):
+    """Asserts the impala shell command line parameter to prevent the addition of http
+    tracing headers actually leaves out those tracing headers."""
+    args = ['--protocol', 'hs2-http', '--no_http_tracing',
+            '-q', 'select version();profile']
+    result = run_impala_shell_cmd(vector, args)
+
+    # Shut down cluster to ensure logs flush to disk.
+    sleep(5)
+    self._stop_impala_cluster()
+
+    # Ensure the query ran successfully.
+    assert result.stdout.find("version()") > -1
+    assert result.stdout.find("impalad version") > -1
+    assert result.stdout.find("Query Runtime Profile") > -1
+
+    # Find all HTTP Connection Tracing log lines (there should not be any).
+    with open(os.path.join(self.LOG_DIR_HTTP_TRACING_OFF, "impalad.INFO")) as log_file:
+      for line in log_file:
+        if line.find("HTTP Connection Tracing Headers") != -1:
+          pytest.fail("found HTTP connection tracing line line: {0}".format(line))