You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by jo...@apache.org on 2020/10/14 22:48:34 UTC

[impala] 03/06: IMPALA-10224: Add startup flag not to expose debug web url to clients

This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 3d067572dd401a08045de57fefa3d3131bea3c2b
Author: Attila Jeges <at...@cloudera.com>
AuthorDate: Tue Oct 6 17:52:58 2020 +0200

    IMPALA-10224: Add startup flag not to expose debug web url to clients
    
    This patch introduces a new startup flag
    --ping_expose_webserver_url (true by default) to control whether
    PingImpalaService, PingImpalaHS2Service RPC calls should expose
    the debug web url to the client or not.
    
    This is necessary as the debug web UI is not something that
    end-users will necessarily have access to.
    
    If the flag is set to false, the RPC calls will return an empty
    string instead of the real url signalling that the debug web ui
    is not available.
    
    Note that if the webserver is disabled (--enable_webserver flag
    is set to false) the RPC calls will behave the same and return an
    empty string for the url.
    
    Change-Id: I7ec3e92764d712b8fee63c1f45b038c31c184cfc
    Reviewed-on: http://gerrit.cloudera.org:8080/16573
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 be/src/runtime/exec-env.cc              |  3 +++
 be/src/runtime/exec-env.h               |  2 +-
 be/src/service/impala-beeswax-server.cc |  8 +++++++-
 be/src/service/impala-hs2-server.cc     |  7 ++++++-
 shell/impala_client.py                  | 13 +++++++------
 shell/impala_shell.py                   | 13 +++++++------
 tests/custom_cluster/test_web_pages.py  | 31 +++++++++++++++++++++++++++++++
 7 files changed, 62 insertions(+), 15 deletions(-)

diff --git a/be/src/runtime/exec-env.cc b/be/src/runtime/exec-env.cc
index 3645504..689ec8f 100644
--- a/be/src/runtime/exec-env.cc
+++ b/be/src/runtime/exec-env.cc
@@ -81,6 +81,9 @@ using namespace strings;
 DEFINE_string(catalog_service_host, "localhost",
     "hostname where CatalogService is running");
 DEFINE_bool(enable_webserver, true, "If true, debug webserver is enabled");
+DEFINE_bool(ping_expose_webserver_url, true,
+    "If true, debug webserver url is exposed via PingImpalaService/PingImpalaHS2Service "
+    "RPC calls");
 DEFINE_string(state_store_host, "localhost",
     "hostname where StatestoreService is running");
 DEFINE_int32(state_store_subscriber_port, 23000,
diff --git a/be/src/runtime/exec-env.h b/be/src/runtime/exec-env.h
index 0d40ac0..e96a070 100644
--- a/be/src/runtime/exec-env.h
+++ b/be/src/runtime/exec-env.h
@@ -137,7 +137,7 @@ class ExecEnv {
   BufferPool* buffer_pool() { return buffer_pool_.get(); }
   SystemStateInfo* system_state_info() { return system_state_info_.get(); }
 
-  void set_enable_webserver(bool enable) { enable_webserver_ = enable; }
+  bool get_enable_webserver() const { return enable_webserver_; }
 
   ClusterMembershipMgr* cluster_membership_mgr() { return cluster_membership_mgr_.get(); }
   Scheduler* scheduler() { return scheduler_.get(); }
diff --git a/be/src/service/impala-beeswax-server.cc b/be/src/service/impala-beeswax-server.cc
index def746c..4d14903 100644
--- a/be/src/service/impala-beeswax-server.cc
+++ b/be/src/service/impala-beeswax-server.cc
@@ -49,6 +49,8 @@ using namespace beeswax;
     }                                                           \
   } while (false)
 
+DECLARE_bool(ping_expose_webserver_url);
+
 namespace impala {
 
 void ImpalaServer::query(beeswax::QueryHandle& beeswax_handle, const Query& query) {
@@ -473,7 +475,11 @@ void ImpalaServer::PingImpalaService(TPingImpalaServiceResp& return_val) {
 
   VLOG_RPC << "PingImpalaService()";
   return_val.version = GetVersionString(true);
-  return_val.webserver_address = ExecEnv::GetInstance()->webserver()->url();
+  if (ExecEnv::GetInstance()->get_enable_webserver() && FLAGS_ping_expose_webserver_url) {
+    return_val.webserver_address = ExecEnv::GetInstance()->webserver()->url();
+  } else {
+    return_val.webserver_address = "";
+  }
   VLOG_RPC << "PingImpalaService(): return_val=" << ThriftDebugString(return_val);
 }
 
diff --git a/be/src/service/impala-hs2-server.cc b/be/src/service/impala-hs2-server.cc
index 9ec92cf..43f01a1 100644
--- a/be/src/service/impala-hs2-server.cc
+++ b/be/src/service/impala-hs2-server.cc
@@ -95,6 +95,7 @@ DECLARE_string(hostname);
 DECLARE_int32(webserver_port);
 DECLARE_int32(idle_session_timeout);
 DECLARE_int32(disconnected_session_timeout);
+DECLARE_bool(ping_expose_webserver_url);
 
 namespace impala {
 
@@ -1200,7 +1201,11 @@ void ImpalaServer::PingImpalaHS2Service(TPingImpalaHS2ServiceResp& return_val,
   }
 
   return_val.__set_version(GetVersionString(true));
-  return_val.__set_webserver_address(ExecEnv::GetInstance()->webserver()->url());
+  if (ExecEnv::GetInstance()->get_enable_webserver() && FLAGS_ping_expose_webserver_url) {
+    return_val.__set_webserver_address(ExecEnv::GetInstance()->webserver()->url());
+  } else {
+    return_val.__set_webserver_address("");
+  }
   VLOG_RPC << "PingImpalaHS2Service(): return_val=" << ThriftDebugString(return_val);
 }
 }
diff --git a/shell/impala_client.py b/shell/impala_client.py
index 8ad86a4..684ef68 100755
--- a/shell/impala_client.py
+++ b/shell/impala_client.py
@@ -334,12 +334,13 @@ class ImpalaClient(object):
 
   def _append_retried_query_link(self, get_log_result):
     """Append the retried query link if the original query has been retried"""
-    query_id_search = re.search("Query has been retried using query id: (.*)\n",
-                                get_log_result)
-    if query_id_search and len(query_id_search.groups()) >= 1:
-      retried_query_id = query_id_search.group(1)
-      get_log_result += "Retried query link: %s" % \
-                        self.get_query_link(retried_query_id)
+    if self.webserver_address:
+      query_id_search = re.search("Query has been retried using query id: (.*)\n",
+                                  get_log_result)
+      if query_id_search and len(query_id_search.groups()) >= 1:
+        retried_query_id = query_id_search.group(1)
+        get_log_result += "Retried query link: %s" % \
+                          self.get_query_link(retried_query_id)
     return get_log_result
 
   def _get_http_transport(self, connect_timeout_ms):
diff --git a/shell/impala_shell.py b/shell/impala_shell.py
index 996f3b3..3d1d810 100755
--- a/shell/impala_shell.py
+++ b/shell/impala_shell.py
@@ -1133,12 +1133,13 @@ class ImpalaShell(cmd.Cmd, object):
       if summary.error_logs:
         for error_line in summary.error_logs:
           data += error_line + "\n"
-          query_id_search = re.search("Retrying query using query id: (.*)",
-                                      error_line)
-          if query_id_search and len(query_id_search.groups()) == 1:
-            retried_query_id = query_id_search.group(1)
-            data += "Retried query link: %s\n"\
-                    % self.imp_client.get_query_link(retried_query_id)
+          if self.webserver_address:
+            query_id_search = re.search("Retrying query using query id: (.*)",
+                                        error_line)
+            if query_id_search and len(query_id_search.groups()) == 1:
+              retried_query_id = query_id_search.group(1)
+              data += "Retried query link: %s\n"\
+                      % self.imp_client.get_query_link(retried_query_id)
 
       if summary.progress:
         progress = summary.progress
diff --git a/tests/custom_cluster/test_web_pages.py b/tests/custom_cluster/test_web_pages.py
index d2eeffb..7913398 100644
--- a/tests/custom_cluster/test_web_pages.py
+++ b/tests/custom_cluster/test_web_pages.py
@@ -23,6 +23,7 @@ import psutil
 import pytest
 
 from tests.common.custom_cluster_test_suite import CustomClusterTestSuite
+from tests.shell.util import run_impala_shell_cmd
 
 
 class TestWebPage(CustomClusterTestSuite):
@@ -138,3 +139,33 @@ class TestWebPage(CustomClusterTestSuite):
     response = requests.get("http://localhost:25000/queries?json")
     response_json = response.text
     assert expected in response_json, "No matching statement found in the queries site."
+
+  # Checks if 'messages' exists/does not exist in 'result_stderr' based on the value of
+  # 'should_exist'
+  def _validate_shell_messages(self, result_stderr, messages, should_exist=True):
+    for msg in messages:
+      if should_exist:
+        assert msg in result_stderr, result_stderr
+      else:
+        assert msg not in result_stderr, result_stderr
+
+  @pytest.mark.execute_serially
+  @CustomClusterTestSuite.with_args(
+      impalad_args="--ping_expose_webserver_url=false"
+  )
+  def test_webserver_url_not_exposed(self, vector):
+    if vector.get_value('table_format').file_format != 'text':
+      pytest.skip('runs only for text table_format')
+    # If webserver url is not exposed, debug web urls shouldn't be printed out.
+    shell_messages = ["Query submitted at: ", "(Coordinator: ",
+        "Query progress can be monitored at: "]
+    query_shell_arg = '--query=select * from functional.alltypes'
+    # hs2
+    results = run_impala_shell_cmd(vector, [query_shell_arg])
+    self._validate_shell_messages(results.stderr, shell_messages, should_exist=False)
+    # beeswax
+    results = run_impala_shell_cmd(vector, ['--protocol=beeswax', query_shell_arg])
+    self._validate_shell_messages(results.stderr, shell_messages, should_exist=False)
+    # Even though webserver url is not exposed, it is still accessible.
+    page = requests.get('http://localhost:25000')
+    assert page.status_code == requests.codes.ok