You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by jo...@apache.org on 2019/01/10 21:19:56 UTC
[04/10] impala git commit: IMPALA-7666: Adding an opaque client
identifier to query options.
IMPALA-7666: Adding an opaque client identifier to query options.
We sometimes struggle to identify the client (e.g., a given version of a
JDBC driver, Tableau, Hue, etc.) for a given query. This commit adds a
User-Agent header style, called "Client Identifier", which clients can
set as a Query Option. Nothing is done with this header, but it's
written into logs and query profiles.
This commit includes changes to impala-shell to include the version of
impala shell with an associated test.
A future commit will serialize the name of the py.test being run into
this field, which is handy for figuring out where a query came from.
Change-Id: I0a7708492f05d33b2bc99fc3a03b461bbb6f3ea4
Reviewed-on: http://gerrit.cloudera.org:8080/12130
Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/a8d3b765
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/a8d3b765
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/a8d3b765
Branch: refs/heads/master
Commit: a8d3b765d88496bd51194cb1aa6edcd9459f68fa
Parents: 049e105
Author: Philip Zeyliger <ph...@cloudera.com>
Authored: Wed Dec 26 14:20:37 2018 -0800
Committer: Impala Public Jenkins <im...@cloudera.com>
Committed: Thu Jan 10 03:11:35 2019 +0000
----------------------------------------------------------------------
be/src/service/query-options.cc | 4 ++++
be/src/service/query-options.h | 3 ++-
common/thrift/ImpalaInternalService.thrift | 3 +++
common/thrift/ImpalaService.thrift | 5 +++++
shell/impala_shell.py | 14 ++++++++++----
tests/shell/test_shell_commandline.py | 6 ++++++
6 files changed, 30 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/impala/blob/a8d3b765/be/src/service/query-options.cc
----------------------------------------------------------------------
diff --git a/be/src/service/query-options.cc b/be/src/service/query-options.cc
index bb49762..c3cf1e3 100644
--- a/be/src/service/query-options.cc
+++ b/be/src/service/query-options.cc
@@ -720,6 +720,10 @@ Status impala::SetQueryOption(const string& key, const string& value,
query_options->__set_topn_bytes_limit(topn_bytes_limit);
break;
}
+ case TImpalaQueryOptions::CLIENT_IDENTIFIER: {
+ query_options->__set_client_identifier(value);
+ break;
+ }
default:
if (IsRemovedQueryOption(key)) {
LOG(WARNING) << "Ignoring attempt to set removed query option '" << key << "'";
http://git-wip-us.apache.org/repos/asf/impala/blob/a8d3b765/be/src/service/query-options.h
----------------------------------------------------------------------
diff --git a/be/src/service/query-options.h b/be/src/service/query-options.h
index 95263f7..f001ba4 100644
--- a/be/src/service/query-options.h
+++ b/be/src/service/query-options.h
@@ -41,7 +41,7 @@ typedef std::unordered_map<string, beeswax::TQueryOptionLevel::type>
// the DCHECK.
#define QUERY_OPTS_TABLE\
DCHECK_EQ(_TImpalaQueryOptions_VALUES_TO_NAMES.size(),\
- TImpalaQueryOptions::TOPN_BYTES_LIMIT + 1);\
+ TImpalaQueryOptions::CLIENT_IDENTIFIER + 1);\
REMOVED_QUERY_OPT_FN(abort_on_default_limit_exceeded, ABORT_ON_DEFAULT_LIMIT_EXCEEDED)\
QUERY_OPT_FN(abort_on_error, ABORT_ON_ERROR, TQueryOptionLevel::REGULAR)\
REMOVED_QUERY_OPT_FN(allow_unsupported_formats, ALLOW_UNSUPPORTED_FORMATS)\
@@ -144,6 +144,7 @@ typedef std::unordered_map<string, beeswax::TQueryOptionLevel::type>
TQueryOptionLevel::ADVANCED)\
QUERY_OPT_FN(cpu_limit_s, CPU_LIMIT_S, TQueryOptionLevel::DEVELOPMENT)\
QUERY_OPT_FN(topn_bytes_limit, TOPN_BYTES_LIMIT, TQueryOptionLevel::ADVANCED)\
+ QUERY_OPT_FN(client_identifier, CLIENT_IDENTIFIER, TQueryOptionLevel::ADVANCED)\
;
/// Enforce practical limits on some query options to avoid undesired query state.
http://git-wip-us.apache.org/repos/asf/impala/blob/a8d3b765/common/thrift/ImpalaInternalService.thrift
----------------------------------------------------------------------
diff --git a/common/thrift/ImpalaInternalService.thrift b/common/thrift/ImpalaInternalService.thrift
index 6add620..36b883d 100644
--- a/common/thrift/ImpalaInternalService.thrift
+++ b/common/thrift/ImpalaInternalService.thrift
@@ -308,6 +308,9 @@ struct TQueryOptions {
// See comment in ImpalaService.thrift
// The default value is set to 512MB based on empirical data
73: optional i64 topn_bytes_limit = 536870912;
+
+ // See comment in ImpalaService.thrift
+ 74: optional string client_identifier;
}
// Impala currently has two types of sessions: Beeswax and HiveServer2
http://git-wip-us.apache.org/repos/asf/impala/blob/a8d3b765/common/thrift/ImpalaService.thrift
----------------------------------------------------------------------
diff --git a/common/thrift/ImpalaService.thrift b/common/thrift/ImpalaService.thrift
index 2c15b39..2986cd9 100644
--- a/common/thrift/ImpalaService.thrift
+++ b/common/thrift/ImpalaService.thrift
@@ -346,6 +346,11 @@ enum TImpalaQueryOptions {
// operator which is capable of spilling to disk (unlike the TopN operator which keeps
// everything in memory). 0 or -1 means this has no effect.
TOPN_BYTES_LIMIT,
+
+ // An opaque string, not used by Impala itself, that can be used to identify
+ // the client, like a User-Agent in HTTP. Drivers should set this to
+ // their version number. May also be used by tests to help identify queries.
+ CLIENT_IDENTIFIER
}
// The summary of a DML statement.
http://git-wip-us.apache.org/repos/asf/impala/blob/a8d3b765/shell/impala_shell.py
----------------------------------------------------------------------
diff --git a/shell/impala_shell.py b/shell/impala_shell.py
index 20790b9..7471c2a 100755
--- a/shell/impala_shell.py
+++ b/shell/impala_shell.py
@@ -45,7 +45,7 @@ from shell_output import OverwritingStdErrOutputStream
from subprocess import call
VERSION_FORMAT = "Impala Shell v%(version)s (%(git_hash)s) built on %(build_date)s"
-VERSION_STRING = "build version not available"
+VERSION_STRING = "impala shell build version not available"
READLINE_UNAVAILABLE_ERROR = "The readline module was either not found or disabled. " \
"Command history will not be collected."
@@ -791,12 +791,20 @@ class ImpalaShell(object, cmd.Cmd):
# Use a temporary to avoid changing set_query_options during iteration.
new_query_options = {}
+ default_query_option_keys = set(self.imp_client.default_query_options)
for set_option, value in self.set_query_options.iteritems():
- if set_option not in set(self.imp_client.default_query_options):
+ if set_option not in default_query_option_keys:
print ('%s is not supported for the impalad being '
'connected to, ignoring.' % set_option)
else:
new_query_options[set_option] = value
+
+ if "CLIENT_IDENTIFIER" not in new_query_options \
+ and "CLIENT_IDENTIFIER" in default_query_option_keys:
+ # Programmatically set default CLIENT_IDENTIFIER to our version string,
+ # if the Impala version supports this option.
+ new_query_options["CLIENT_IDENTIFIER"] = VERSION_STRING
+
self.set_query_options = new_query_options
def _connect(self):
@@ -1618,8 +1626,6 @@ if __name__ == "__main__":
print_to_stderr('%s not found.\n' % user_config)
sys.exit(1)
- query_options = {}
-
# default shell options loaded in from impala_shell_config_defaults.py
# options defaults overwritten by those in config file
try:
http://git-wip-us.apache.org/repos/asf/impala/blob/a8d3b765/tests/shell/test_shell_commandline.py
----------------------------------------------------------------------
diff --git a/tests/shell/test_shell_commandline.py b/tests/shell/test_shell_commandline.py
index 72b2100..66c28e9 100644
--- a/tests/shell/test_shell_commandline.py
+++ b/tests/shell/test_shell_commandline.py
@@ -793,3 +793,9 @@ class TestImpalaShell(ImpalaTestSuite):
test_port = s.getsockname()[1]
args = '-q "select foo; select bar;" --ssl -t 2000 -i localhost:%d' % (test_port)
run_impala_shell_cmd(args, expect_success=False)
+
+ def test_client_identifier(self):
+ """Confirms that a version string is passed along."""
+ args = "--query 'select 0; profile'"
+ result = run_impala_shell_cmd(args)
+ assert 'client_identifier=impala shell' in result.stdout.lower()