You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by jo...@apache.org on 2019/01/10 21:19:56 UTC

[04/10] impala git commit: IMPALA-7666: Adding an opaque client identifier to query options.

IMPALA-7666: Adding an opaque client identifier to query options.

We sometimes struggle to identify the client (e.g., a given version of a
JDBC driver, Tableau, Hue, etc.) for a given query. This commit adds a
User-Agent header style, called "Client Identifier", which clients can
set as a Query Option. Nothing is done with this header, but it's
written into logs and query profiles.

This commit includes changes to impala-shell to include the version of
impala shell with an associated test.

A future commit will serialize the name of the py.test being run into
this field, which is handy for figuring out where a query came from.

Change-Id: I0a7708492f05d33b2bc99fc3a03b461bbb6f3ea4
Reviewed-on: http://gerrit.cloudera.org:8080/12130
Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/a8d3b765
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/a8d3b765
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/a8d3b765

Branch: refs/heads/master
Commit: a8d3b765d88496bd51194cb1aa6edcd9459f68fa
Parents: 049e105
Author: Philip Zeyliger <ph...@cloudera.com>
Authored: Wed Dec 26 14:20:37 2018 -0800
Committer: Impala Public Jenkins <im...@cloudera.com>
Committed: Thu Jan 10 03:11:35 2019 +0000

----------------------------------------------------------------------
 be/src/service/query-options.cc            |  4 ++++
 be/src/service/query-options.h             |  3 ++-
 common/thrift/ImpalaInternalService.thrift |  3 +++
 common/thrift/ImpalaService.thrift         |  5 +++++
 shell/impala_shell.py                      | 14 ++++++++++----
 tests/shell/test_shell_commandline.py      |  6 ++++++
 6 files changed, 30 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/a8d3b765/be/src/service/query-options.cc
----------------------------------------------------------------------
diff --git a/be/src/service/query-options.cc b/be/src/service/query-options.cc
index bb49762..c3cf1e3 100644
--- a/be/src/service/query-options.cc
+++ b/be/src/service/query-options.cc
@@ -720,6 +720,10 @@ Status impala::SetQueryOption(const string& key, const string& value,
         query_options->__set_topn_bytes_limit(topn_bytes_limit);
         break;
       }
+      case TImpalaQueryOptions::CLIENT_IDENTIFIER: {
+        query_options->__set_client_identifier(value);
+        break;
+      }
       default:
         if (IsRemovedQueryOption(key)) {
           LOG(WARNING) << "Ignoring attempt to set removed query option '" << key << "'";

http://git-wip-us.apache.org/repos/asf/impala/blob/a8d3b765/be/src/service/query-options.h
----------------------------------------------------------------------
diff --git a/be/src/service/query-options.h b/be/src/service/query-options.h
index 95263f7..f001ba4 100644
--- a/be/src/service/query-options.h
+++ b/be/src/service/query-options.h
@@ -41,7 +41,7 @@ typedef std::unordered_map<string, beeswax::TQueryOptionLevel::type>
 // the DCHECK.
 #define QUERY_OPTS_TABLE\
   DCHECK_EQ(_TImpalaQueryOptions_VALUES_TO_NAMES.size(),\
-      TImpalaQueryOptions::TOPN_BYTES_LIMIT + 1);\
+      TImpalaQueryOptions::CLIENT_IDENTIFIER + 1);\
   REMOVED_QUERY_OPT_FN(abort_on_default_limit_exceeded, ABORT_ON_DEFAULT_LIMIT_EXCEEDED)\
   QUERY_OPT_FN(abort_on_error, ABORT_ON_ERROR, TQueryOptionLevel::REGULAR)\
   REMOVED_QUERY_OPT_FN(allow_unsupported_formats, ALLOW_UNSUPPORTED_FORMATS)\
@@ -144,6 +144,7 @@ typedef std::unordered_map<string, beeswax::TQueryOptionLevel::type>
       TQueryOptionLevel::ADVANCED)\
   QUERY_OPT_FN(cpu_limit_s, CPU_LIMIT_S, TQueryOptionLevel::DEVELOPMENT)\
   QUERY_OPT_FN(topn_bytes_limit, TOPN_BYTES_LIMIT, TQueryOptionLevel::ADVANCED)\
+  QUERY_OPT_FN(client_identifier, CLIENT_IDENTIFIER, TQueryOptionLevel::ADVANCED)\
   ;
 
 /// Enforce practical limits on some query options to avoid undesired query state.

http://git-wip-us.apache.org/repos/asf/impala/blob/a8d3b765/common/thrift/ImpalaInternalService.thrift
----------------------------------------------------------------------
diff --git a/common/thrift/ImpalaInternalService.thrift b/common/thrift/ImpalaInternalService.thrift
index 6add620..36b883d 100644
--- a/common/thrift/ImpalaInternalService.thrift
+++ b/common/thrift/ImpalaInternalService.thrift
@@ -308,6 +308,9 @@ struct TQueryOptions {
   // See comment in ImpalaService.thrift
   // The default value is set to 512MB based on empirical data
   73: optional i64 topn_bytes_limit = 536870912;
+
+  // See comment in ImpalaService.thrift
+  74: optional string client_identifier;
 }
 
 // Impala currently has two types of sessions: Beeswax and HiveServer2

http://git-wip-us.apache.org/repos/asf/impala/blob/a8d3b765/common/thrift/ImpalaService.thrift
----------------------------------------------------------------------
diff --git a/common/thrift/ImpalaService.thrift b/common/thrift/ImpalaService.thrift
index 2c15b39..2986cd9 100644
--- a/common/thrift/ImpalaService.thrift
+++ b/common/thrift/ImpalaService.thrift
@@ -346,6 +346,11 @@ enum TImpalaQueryOptions {
   // operator which is capable of spilling to disk (unlike the TopN operator which keeps
   // everything in memory). 0 or -1 means this has no effect.
   TOPN_BYTES_LIMIT,
+
+  // An opaque string, not used by Impala itself, that can be used to identify
+  // the client, like a User-Agent in HTTP. Drivers should set this to
+  // their version number. May also be used by tests to help identify queries.
+  CLIENT_IDENTIFIER
 }
 
 // The summary of a DML statement.

http://git-wip-us.apache.org/repos/asf/impala/blob/a8d3b765/shell/impala_shell.py
----------------------------------------------------------------------
diff --git a/shell/impala_shell.py b/shell/impala_shell.py
index 20790b9..7471c2a 100755
--- a/shell/impala_shell.py
+++ b/shell/impala_shell.py
@@ -45,7 +45,7 @@ from shell_output import OverwritingStdErrOutputStream
 from subprocess import call
 
 VERSION_FORMAT = "Impala Shell v%(version)s (%(git_hash)s) built on %(build_date)s"
-VERSION_STRING = "build version not available"
+VERSION_STRING = "impala shell build version not available"
 READLINE_UNAVAILABLE_ERROR = "The readline module was either not found or disabled. " \
                              "Command history will not be collected."
 
@@ -791,12 +791,20 @@ class ImpalaShell(object, cmd.Cmd):
 
     # Use a temporary to avoid changing set_query_options during iteration.
     new_query_options = {}
+    default_query_option_keys = set(self.imp_client.default_query_options)
     for set_option, value in self.set_query_options.iteritems():
-      if set_option not in set(self.imp_client.default_query_options):
+      if set_option not in default_query_option_keys:
         print ('%s is not supported for the impalad being '
                'connected to, ignoring.' % set_option)
       else:
         new_query_options[set_option] = value
+
+    if "CLIENT_IDENTIFIER" not in new_query_options \
+        and "CLIENT_IDENTIFIER" in default_query_option_keys:
+      # Programmatically set default CLIENT_IDENTIFIER to our version string,
+      # if the Impala version supports this option.
+      new_query_options["CLIENT_IDENTIFIER"] = VERSION_STRING
+
     self.set_query_options = new_query_options
 
   def _connect(self):
@@ -1618,8 +1626,6 @@ if __name__ == "__main__":
     print_to_stderr('%s not found.\n' % user_config)
     sys.exit(1)
 
-  query_options = {}
-
   # default shell options loaded in from impala_shell_config_defaults.py
   # options defaults overwritten by those in config file
   try:

http://git-wip-us.apache.org/repos/asf/impala/blob/a8d3b765/tests/shell/test_shell_commandline.py
----------------------------------------------------------------------
diff --git a/tests/shell/test_shell_commandline.py b/tests/shell/test_shell_commandline.py
index 72b2100..66c28e9 100644
--- a/tests/shell/test_shell_commandline.py
+++ b/tests/shell/test_shell_commandline.py
@@ -793,3 +793,9 @@ class TestImpalaShell(ImpalaTestSuite):
       test_port = s.getsockname()[1]
       args = '-q "select foo; select bar;" --ssl -t 2000 -i localhost:%d' % (test_port)
       run_impala_shell_cmd(args, expect_success=False)
+
+  def test_client_identifier(self):
+    """Confirms that a version string is passed along."""
+    args = "--query 'select 0; profile'"
+    result = run_impala_shell_cmd(args)
+    assert 'client_identifier=impala shell' in result.stdout.lower()