You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by sa...@apache.org on 2016/08/23 18:28:13 UTC

[4/4] incubator-impala git commit: IMPALA-3893, IMPALA-3901: impala-shell prints incorrect coordinator address, overly verbose

IMPALA-3893, IMPALA-3901: impala-shell prints incorrect coordinator address, overly verbose

The webserver address was always configured as 0.0.0.0 (meaning that
the webserver could be reached on any IP for that machine) unless
otherwise specified. This is not a correct value to dispay to the
user. This patch returns the hostname of the node, when requested,
if the webserver host address is 0.0.0.0.

This patch also does not print the coordinator link for very simple
queries, as it's not necessary and is unnecessarily verbose.

This patch also does away with pinging the impalad an extra time per
query for finding the host time and webserver address. It instead
remembers the webserver address at connect time and displays client
local time for every query instead.

Change-Id: I9d167b66f2dd8629e40a7094d21ea7ce6b43d23b
Reviewed-on: http://gerrit.cloudera.org:8080/3994
Tested-by: Internal Jenkins
Reviewed-by: Sailesh Mukil <sa...@cloudera.com>
Tested-by: Sailesh Mukil <sa...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/c23bf38a
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/c23bf38a
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/c23bf38a

Branch: refs/heads/master
Commit: c23bf38a201e400a98b2f8f34b5aec95fad27e2b
Parents: deaccbb
Author: Sailesh Mukil <sa...@cloudera.com>
Authored: Mon Aug 15 15:38:38 2016 -0700
Committer: Sailesh Mukil <sa...@cloudera.com>
Committed: Tue Aug 23 18:25:06 2016 +0000

----------------------------------------------------------------------
 be/src/service/impala-beeswax-server.cc |  1 -
 be/src/util/webserver.cc                |  8 ++++-
 common/thrift/ImpalaService.thrift      |  3 --
 shell/impala_client.py                  |  2 +-
 shell/impala_shell.py                   | 38 +++++++++++----------
 tests/shell/test_shell_commandline.py   | 49 +++++++++++++++++++++++-----
 6 files changed, 70 insertions(+), 31 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c23bf38a/be/src/service/impala-beeswax-server.cc
----------------------------------------------------------------------
diff --git a/be/src/service/impala-beeswax-server.cc b/be/src/service/impala-beeswax-server.cc
index cdfa52f..3daa36b 100644
--- a/be/src/service/impala-beeswax-server.cc
+++ b/be/src/service/impala-beeswax-server.cc
@@ -502,7 +502,6 @@ void ImpalaServer::PingImpalaService(TPingImpalaServiceResp& return_val) {
   VLOG_RPC << "PingImpalaService()";
   return_val.version = GetVersionString(true);
   return_val.webserver_address = ExecEnv::GetInstance()->webserver()->Url();
-  return_val.epoch_time = reinterpret_cast<int64_t>(std::time(0));
   VLOG_RPC << "PingImpalaService(): return_val=" << ThriftDebugString(return_val);
 }
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c23bf38a/be/src/util/webserver.cc
----------------------------------------------------------------------
diff --git a/be/src/util/webserver.cc b/be/src/util/webserver.cc
index dcbfec6..41974bb 100644
--- a/be/src/util/webserver.cc
+++ b/be/src/util/webserver.cc
@@ -219,8 +219,14 @@ bool Webserver::IsSecure() const {
 }
 
 string Webserver::Url() {
+  string hostname = http_address_.hostname;
+  if (IsWildcardAddress(http_address_.hostname)) {
+    if (!GetHostname(&hostname).ok()) {
+      hostname = http_address_.hostname;
+    }
+  }
   return Substitute("$0://$1:$2", IsSecure() ? "https" : "http",
-      http_address_.hostname, http_address_.port);
+      hostname, http_address_.port);
 }
 
 Status Webserver::Start() {

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c23bf38a/common/thrift/ImpalaService.thrift
----------------------------------------------------------------------
diff --git a/common/thrift/ImpalaService.thrift b/common/thrift/ImpalaService.thrift
index 789641a..25dfddf 100644
--- a/common/thrift/ImpalaService.thrift
+++ b/common/thrift/ImpalaService.thrift
@@ -251,9 +251,6 @@ struct TPingImpalaServiceResp {
 
   // The Impalad's webserver address.
   2: string webserver_address
-
-  // The Impalad's server time.
-  3: i64 epoch_time
 }
 
 // Parameters for a ResetTable request which will invalidate a table's metadata.

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c23bf38a/shell/impala_client.py
----------------------------------------------------------------------
diff --git a/shell/impala_client.py b/shell/impala_client.py
index 7782aa2..bc20b09 100755
--- a/shell/impala_client.py
+++ b/shell/impala_client.py
@@ -235,7 +235,7 @@ class ImpalaClient(object):
     self.imp_service = ImpalaService.Client(protocol)
     result = self.ping_impala_service()
     self.connected = True
-    return result.version
+    return result
 
   def ping_impala_service(self):
     return self.imp_service.PingImpalaService()

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c23bf38a/shell/impala_shell.py
----------------------------------------------------------------------
diff --git a/shell/impala_shell.py b/shell/impala_shell.py
index b9dc60a..74d7ecb 100755
--- a/shell/impala_shell.py
+++ b/shell/impala_shell.py
@@ -91,6 +91,7 @@ class ImpalaShell(cmd.Cmd):
   # If not connected to an impalad, the server version is unknown.
   UNKNOWN_SERVER_VERSION = "Not Connected"
   DISCONNECTED_PROMPT = "[Not connected] > "
+  UNKNOWN_WEBSERVER = "0.0.0.0"
   # Message to display in shell when cancelling a query
   CANCELLATION_MESSAGE = ' Cancelling Query'
   # Number of times to attempt cancellation before giving up.
@@ -134,6 +135,7 @@ class ImpalaShell(cmd.Cmd):
     self.verbose = options.verbose
     self.prompt = ImpalaShell.DISCONNECTED_PROMPT
     self.server_version = ImpalaShell.UNKNOWN_SERVER_VERSION
+    self.webserver_address = ImpalaShell.UNKNOWN_WEBSERVER
 
     self.refresh_after_connect = options.refresh_after_connect
     self.current_db = options.default_db
@@ -680,7 +682,9 @@ class ImpalaShell(cmd.Cmd):
 
   def _connect(self):
     try:
-      self.server_version = self.imp_client.connect()
+      result = self.imp_client.connect()
+      self.server_version = result.version
+      self.webserver_address = result.webserver_address
     except TApplicationException:
       # We get a TApplicationException if the transport is valid,
       # but the RPC does not exist.
@@ -751,9 +755,11 @@ class ImpalaShell(cmd.Cmd):
     return self._execute_stmt(query)
 
   def do_create(self, args):
+    # We want to print the webserver link only for CTAS queries.
+    print_web_link = "select" in args
     query = self.imp_client.create_beeswax_query("create %s" % args,
                                                  self.set_query_options)
-    return self._execute_stmt(query)
+    return self._execute_stmt(query, print_web_link=print_web_link)
 
   def do_drop(self, args):
     query = self.imp_client.create_beeswax_query("drop %s" % args,
@@ -780,7 +786,7 @@ class ImpalaShell(cmd.Cmd):
     """Executes a SELECT... query, fetching all rows"""
     query = self.imp_client.create_beeswax_query("select %s" % args,
                                                  self.set_query_options)
-    return self._execute_stmt(query)
+    return self._execute_stmt(query, print_web_link=True)
 
   def do_compute(self, args):
     """Executes a COMPUTE STATS query.
@@ -850,7 +856,7 @@ class ImpalaShell(cmd.Cmd):
                                              "#Rows", "Est. #Rows", "Peak Mem",
                                              "Est. Peak Mem", "Detail"])
 
-  def _execute_stmt(self, query, is_insert=False):
+  def _execute_stmt(self, query, is_insert=False, print_web_link=False):
     """ The logic of executing any query statement
 
     The client executes the query and the query_handle is returned immediately,
@@ -862,26 +868,24 @@ class ImpalaShell(cmd.Cmd):
     The execution time is printed and the query is closed if it hasn't been already
     """
 
-    self._print_if_verbose("Query: %s" % (query.query,))
+    self._print_if_verbose("Query: %s" % query.query)
     # TODO: Clean up this try block and refactor it (IMPALA-3814)
     try:
-      # Get the hostname, webserver port and the current coordinator time.
-      coordinator = self.imp_client.ping_impala_service()
-      # If the coordinator is on a different time zone, the epoch time returned will be
-      # different than from this system, so time.localtime(coordinator.epoch_time) will
-      # return the timestamp of the server.
-      self._print_if_verbose("Query submitted at: %s (Coordinator: %s)" % (time.strftime(
-          "%Y-%m-%d %H:%M:%S", time.localtime(coordinator.epoch_time)),
-          coordinator.webserver_address))
+      if self.webserver_address == ImpalaShell.UNKNOWN_WEBSERVER:
+        print_web_link = False
+      if print_web_link:
+        self._print_if_verbose("Query submitted at: %s (Coordinator: %s)" %
+            (time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()),
+            self.webserver_address))
 
       start_time = time.time()
       self.last_query_handle = self.imp_client.execute_query(query)
       self.query_handle_closed = False
       self.last_summary = time.time()
-      if coordinator.webserver_address:
+      if print_web_link:
         self._print_if_verbose(
             "Query progress can be monitored at: %s/query_plan?query_id=%s" %
-            (coordinator.webserver_address, self.last_query_handle.id))
+            (self.webserver_address, self.last_query_handle.id))
 
       wait_to_finish = self.imp_client.wait_to_finish(self.last_query_handle,
           self._periodic_wait_callback)
@@ -1020,7 +1024,7 @@ class ImpalaShell(cmd.Cmd):
     """Executes an INSERT query"""
     query = self.imp_client.create_beeswax_query("insert %s" % args,
                                                  self.set_query_options)
-    return self._execute_stmt(query, is_insert=True)
+    return self._execute_stmt(query, is_insert=True, print_web_link=True)
 
   def do_explain(self, args):
     """Explain the query execution plan"""
@@ -1103,7 +1107,7 @@ class ImpalaShell(cmd.Cmd):
 
   def default(self, args):
     query = self.imp_client.create_beeswax_query(args, self.set_query_options)
-    return self._execute_stmt(query)
+    return self._execute_stmt(query, print_web_link=True)
 
   def emptyline(self):
     """If an empty line is entered, do nothing"""

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c23bf38a/tests/shell/test_shell_commandline.py
----------------------------------------------------------------------
diff --git a/tests/shell/test_shell_commandline.py b/tests/shell/test_shell_commandline.py
index 29c5040..31cf570 100644
--- a/tests/shell/test_shell_commandline.py
+++ b/tests/shell/test_shell_commandline.py
@@ -420,14 +420,47 @@ class TestImpalaShell(ImpalaTestSuite):
     result = run_impala_shell_cmd(args, expect_success=True)
     assert_var_substitution(result)
 
-  def test_query_start_time_message(self):
-    results = run_impala_shell_cmd('--query="select 1"')
-    assert "Query submitted at: " in results.stderr
-
-  def test_query_coordinator_link_message(self):
-    results = run_impala_shell_cmd('--query="select 1"')
-    assert "(Coordinator: " in results.stderr
-    assert "Query progress can be monitored at: " in results.stderr
+  # Checks if 'messages' exists/does not exist in 'result_stderr' based on the value of
+  # 'should_exist'
+  def _validate_shell_messages(self, result_stderr, messages, should_exist=True):
+    for msg in messages:
+      if should_exist:
+        assert msg in result_stderr
+      else:
+        assert msg not in result_stderr
+
+  def test_query_time_and_link_message(self, unique_database):
+    shell_messages = ["Query submitted at: ", "(Coordinator: ",
+        "Query progress can be monitored at: "]
+    # CREATE statements should not print query time and webserver address.
+    results = run_impala_shell_cmd('--query="create table %s.shell_msg_test (id int)"' %
+        unique_database)
+    self._validate_shell_messages(results.stderr, shell_messages, should_exist=False)
+
+    # SELECT, INSERT and CTAS queries should print the query time message and webserver
+    # address.
+    results = run_impala_shell_cmd('--query="insert into %s.shell_msg_test values (1)"' %
+        unique_database)
+    self._validate_shell_messages(results.stderr, shell_messages, should_exist=True)
+    results = run_impala_shell_cmd('--query="select * from %s.shell_msg_test"' %
+        unique_database)
+    self._validate_shell_messages(results.stderr, shell_messages, should_exist=True)
+    results = run_impala_shell_cmd('--query="create table %s.shell_msg_ctas_test as \
+        select * from %s.shell_msg_test"' % (unique_database, unique_database))
+    self._validate_shell_messages(results.stderr, shell_messages, should_exist=True)
+
+    # DROP statements should not print query time and webserver address.
+    results = run_impala_shell_cmd('--query="drop table %s.shell_msg_test"' %
+        unique_database)
+    self._validate_shell_messages(results.stderr, shell_messages, should_exist=False)
+    run_impala_shell_cmd('--query="drop table %s.shell_msg_ctas_test"' %
+        unique_database)
+
+    # Simple queries should not print query time and webserver address.
+    results = run_impala_shell_cmd('--query="use default"')
+    self._validate_shell_messages(results.stderr, shell_messages, should_exist=False)
+    results = run_impala_shell_cmd('--query="show tables"')
+    self._validate_shell_messages(results.stderr, shell_messages, should_exist=False)
 
   def test_missing_query_file(self):
     result = run_impala_shell_cmd('-f nonexistent.sql', expect_success=False)