You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2020/03/23 22:03:50 UTC

[impala] 03/03: IMPALA-6360: Don't show full query statement on Impala WebUI by default

This is an automated email from the ASF dual-hosted git repository.

tarmstrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 3b833902519fb8f0ef9b5fd20919c5fd85d22fcf
Author: Adam Tamas <ta...@cloudera.com>
AuthorDate: Tue Feb 25 13:43:56 2020 +0100

    IMPALA-6360: Don't show full query statement on Impala WebUI by default
    
    In the WebUI's query list the query statements are trimmed, but  the full
    query statement can be seen in the details page.
    The default statement length is 250 chars and it can be adjusted by the
    query_stmt_size flag that can be set when the cluster starts.
    Example:
    bin/start-impala-cluster.py -s1 --impalad_args --query_stmt_size=10
    
    Testing:
    -manual testing in the WebUI.
    -added 'test_query_stmt()' to test_web_pages.py
    -added test to the custom cluster webserver tests to check without truncate
    and with custom length truncate
    
    Change-Id: Ib7109a0be5d1022b4f8d6e72441cf5dc1dc42605
    Reviewed-on: http://gerrit.cloudera.org:8080/15288
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 be/src/service/impala-http-handler.cc  |  7 ++++++-
 be/src/service/impala-server.cc        |  2 ++
 tests/custom_cluster/test_web_pages.py | 34 ++++++++++++++++++++++++++++++++++
 tests/webserver/test_web_pages.py      | 18 ++++++++++++++++++
 www/queries.tmpl                       |  2 ++
 5 files changed, 62 insertions(+), 1 deletion(-)

diff --git a/be/src/service/impala-http-handler.cc b/be/src/service/impala-http-handler.cc
index 7efcbb7..3a5802b 100644
--- a/be/src/service/impala-http-handler.cc
+++ b/be/src/service/impala-http-handler.cc
@@ -59,6 +59,7 @@ using namespace rapidjson;
 using namespace strings;
 
 DECLARE_int32(query_log_size);
+DECLARE_int32(query_stmt_size);
 DECLARE_bool(use_local_catalog);
 
 namespace {
@@ -368,7 +369,11 @@ void ImpalaHttpHandler::QueryStateToJson(const ImpalaServer::QueryStateRecord& r
   value->AddMember("default_db", default_db, document->GetAllocator());
 
   // Redact the query string
-  Value stmt(RedactCopy(record.stmt).c_str(), document->GetAllocator());
+  std::string tmp_stmt = RedactCopy(record.stmt);
+  if(FLAGS_query_stmt_size && tmp_stmt.length() > FLAGS_query_stmt_size) {
+    tmp_stmt = tmp_stmt.substr(0, FLAGS_query_stmt_size).append("...");
+  }
+  Value stmt(tmp_stmt.c_str(), document->GetAllocator());
   value->AddMember("stmt", stmt, document->GetAllocator());
 
   Value stmt_type(_TStmtType_VALUES_TO_NAMES.find(record.stmt_type)->second,
diff --git a/be/src/service/impala-server.cc b/be/src/service/impala-server.cc
index 53d6aea..fb931b7 100644
--- a/be/src/service/impala-server.cc
+++ b/be/src/service/impala-server.cc
@@ -150,6 +150,8 @@ DEFINE_string(default_query_options, "", "key=value pair of default query option
     " impalad, separated by ','");
 DEFINE_int32(query_log_size, 25, "Number of queries to retain in the query log. If -1, "
     "the query log has unbounded size.");
+DEFINE_int32(query_stmt_size, 250, "length of the statements in the query log. If <=0, "
+    "the full statement is displayed in the query log without trimming.");
 DEFINE_bool(log_query_to_file, true, "if true, logs completed query profiles to file.");
 
 DEFINE_int64(max_result_cache_size, 100000L, "Maximum number of query results a client "
diff --git a/tests/custom_cluster/test_web_pages.py b/tests/custom_cluster/test_web_pages.py
index 6a83961..d2eeffb 100644
--- a/tests/custom_cluster/test_web_pages.py
+++ b/tests/custom_cluster/test_web_pages.py
@@ -104,3 +104,37 @@ class TestWebPage(CustomClusterTestSuite):
         assert response.status_code == requests.codes.ok, ip
       except requests.exceptions.ConnectionError:
         assert ip != interface
+
+  @pytest.mark.execute_serially
+  @CustomClusterTestSuite.with_args(
+      impalad_args="--query_stmt_size=0"
+  )
+  def test_query_stmt_without_truncate(self):
+    """Check if the full query string is displayed in the query list on the WebUI."""
+    # The input query is a select + 450 'x ' long.
+    query_select = "x " * 450
+    query = 'select "{0}"'.format(query_select)
+    # In the site there is an extra \ before the " so we need that in the expected
+    # response too.
+    expected = 'select \\"{0}\\"'.format(query_select)
+    self.execute_query(query)
+    response = requests.get("http://localhost:25000/queries?json")
+    response_json = response.text
+    assert expected in response_json, "No matching statement found in the queries site."
+
+  @pytest.mark.execute_serially
+  @CustomClusterTestSuite.with_args(
+      impalad_args="--query_stmt_size=10"
+  )
+  def test_query_stmt_with_custom_length(self):
+    """Check if the partial query with the correct length is displayed in the query list
+    on the WebUI."""
+    # The input query is a select + 450 'x ' long.
+    query = 'select "{0}"'.format("x " * 450)
+    # Searching for the custom, 10 chars long response. In the site there is an extra \
+    # before the " so we need that in the expected response too.
+    expected = 'select \\"x ...'
+    self.execute_query(query)
+    response = requests.get("http://localhost:25000/queries?json")
+    response_json = response.text
+    assert expected in response_json, "No matching statement found in the queries site."
diff --git a/tests/webserver/test_web_pages.py b/tests/webserver/test_web_pages.py
index 07eaf9b..6fa6cea 100644
--- a/tests/webserver/test_web_pages.py
+++ b/tests/webserver/test_web_pages.py
@@ -50,6 +50,7 @@ class TestWebPage(ImpalaTestSuite):
   RESET_RESOURCE_POOL_STATS_URL = "http://localhost:{0}/resource_pool_reset"
   BACKENDS_URL = "http://localhost:{0}/backends"
   PROMETHEUS_METRICS_URL = "http://localhost:{0}/metrics_prometheus"
+  QUERIES_URL = "http://localhost:{0}/queries"
 
   # log4j changes do not apply to the statestore since it doesn't
   # have an embedded JVM. So we make two sets of ports to test the
@@ -416,6 +417,23 @@ class TestWebPage(ImpalaTestSuite):
       if tblinfo["name"] == tbl_fname:
         assert tblinfo["num_files"] == int(numfiles)
 
+  def test_query_stmt(self):
+    """Create a long select query then check if it is truncated in the response json."""
+    # The imput query is a select + 450 "x " long, which is long enough to get truncated.
+    query = "select \"{0}\"".format("x " * 450)
+    # The expected result query should be 253 long and contains the first 250
+    # chars + "..."
+    expected_result = "select \"{0}...".format("x " * 121)
+    check_if_contains = False
+    self.execute_query(query)
+    response_json = self.__run_query_and_get_debug_page(query, self.QUERIES_URL)
+    # Search the json for the expected value.
+    for json_part in response_json['completed_queries']:
+      if expected_result in json_part['stmt']:
+        check_if_contains = True
+        break
+    assert check_if_contains, "No matching statement found in the jsons."
+
   def __run_query_and_get_debug_page(self, query, page_url, query_options=None,
                                      expected_state=None):
     """Runs a query to obtain the content of the debug page pointed to by page_url, then
diff --git a/www/queries.tmpl b/www/queries.tmpl
index 5e0fa8e..893b424 100644
--- a/www/queries.tmpl
+++ b/www/queries.tmpl
@@ -21,6 +21,8 @@ under the License.
 <p class="lead">This page lists all running queries, plus any completed queries that are
 archived in memory. The size of that archive is controlled with the
 <samp>--query_log_size</samp> command line parameter.</p>
+<p>The length of the statements are controlled with the <samp>--query_stmt_size</samp>
+command line parameter.</p>
 
 <h3>{{num_executing_queries}} queries in flight</h3>