You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2017/12/22 15:47:06 UTC

[3/5] impala git commit: IMPALA-6225: Part 2: Query profile date-time strings should have ns precision.

IMPALA-6225: Part 2: Query profile date-time strings should have ns
precision.

This commit follows 16d8dd58.

This patch adds a test case that inspects the thrift profile of a
completed query, and verifies that the "Start Time" and
"End Time" of the query have nanosecond precision. We chose to
work with the thrift profile directly, rather than parse the debug
web page, as it is the thrift profile which is consumed by
management API clients of Impala.

Change-Id: Id3421a34cc029ebca551730084c7cbd402d5c109
Reviewed-on: http://gerrit.cloudera.org:8080/8784
Reviewed-by: Michael Ho <kw...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/b581a9d1
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/b581a9d1
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/b581a9d1

Branch: refs/heads/master
Commit: b581a9d1ee277674671431dbd6880806d6120567
Parents: ac9ca56
Author: Zoram Thanga <zo...@cloudera.com>
Authored: Tue Dec 5 15:30:49 2017 -0800
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Thu Dec 21 04:26:33 2017 +0000

----------------------------------------------------------------------
 tests/common/impala_service.py         | 32 +++++++++++++++++++-
 tests/query_test/test_observability.py | 47 +++++++++++++++++++++++++++++
 2 files changed, 78 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/b581a9d1/tests/common/impala_service.py
----------------------------------------------------------------------
diff --git a/tests/common/impala_service.py b/tests/common/impala_service.py
index be33328..dfd4a6e 100644
--- a/tests/common/impala_service.py
+++ b/tests/common/impala_service.py
@@ -29,7 +29,11 @@ from tests.common.impala_connection import create_connection, create_ldap_connec
 from TCLIService import TCLIService
 from thrift.transport.TSocket import TSocket
 from thrift.transport.TTransport import TBufferedTransport
-from thrift.protocol import TBinaryProtocol
+from thrift.protocol import TBinaryProtocol, TCompactProtocol, TProtocol
+from thrift.TSerialization import deserialize
+from RuntimeProfile.ttypes import TRuntimeProfileTree
+import base64
+import zlib
 
 logging.basicConfig(level=logging.ERROR, format='%(threadName)s: %(message)s')
 LOG = logging.getLogger('impala_service')
@@ -57,6 +61,32 @@ class BaseImpalaService(object):
   def read_debug_webpage(self, page_name, timeout=10, interval=1):
     return self.open_debug_webpage(page_name, timeout=timeout, interval=interval).read()
 
+  def get_thrift_profile(self, query_id, timeout=10, interval=1):
+    """Returns thrift profile of the specified query ID, if available"""
+    page_name = "query_profile_encoded?query_id=%s" % (query_id)
+    try:
+      response = self.open_debug_webpage(page_name, timeout=timeout, interval=interval)
+      tbuf = response.read()
+    except Exception as e:
+      LOG.info("Thrift profile for query %s not yet available: %s", query_id, str(e))
+      return None
+    else:
+      tree = TRuntimeProfileTree()
+      try:
+        deserialize(tree, zlib.decompress(base64.b64decode(tbuf)),
+                  protocol_factory=TCompactProtocol.TCompactProtocolFactory())
+        tree.validate()
+        return tree
+      except Exception as e:
+        LOG.info("Exception while deserializing query profile of %s: %s", query_id,
+                str(e));
+        # We should assert that the response code is not 200 once
+        # IMPALA-6332: Impala webserver should return HTTP error code for missing query
+        # profiles, is fixed.
+        if str(e) == 'Incorrect padding':
+          assert "Could not obtain runtime profile" in tbuf, tbuf
+        return None
+
   def get_debug_webpage_json(self, page_name):
     """Returns the json for the given Impala debug webpage, eg. '/queries'"""
     return json.loads(self.read_debug_webpage(page_name + "?json"))

http://git-wip-us.apache.org/repos/asf/impala/blob/b581a9d1/tests/query_test/test_observability.py
----------------------------------------------------------------------
diff --git a/tests/query_test/test_observability.py b/tests/query_test/test_observability.py
index a7c3d29..1396ac5 100644
--- a/tests/query_test/test_observability.py
+++ b/tests/query_test/test_observability.py
@@ -17,6 +17,9 @@
 
 from tests.common.impala_test_suite import ImpalaTestSuite
 from tests.common.skip import SkipIfS3, SkipIfADLS, SkipIfIsilon, SkipIfLocal
+from tests.common.impala_cluster import ImpalaCluster
+import logging
+import time
 
 class TestObservability(ImpalaTestSuite):
   @classmethod
@@ -128,3 +131,47 @@ class TestObservability(ImpalaTestSuite):
     assert results.runtime_profile.count("HASH_JOIN_NODE") == 2
     assert results.runtime_profile.count("AGGREGATION_NODE") == 2
     assert results.runtime_profile.count("PLAN_ROOT_SINK") == 2
+
+  def test_query_profile_thrift_timestamps(self):
+    """Test that the query profile start and end time date-time strings have
+    nanosecond precision. Nanosecond precision is expected by management API clients
+    that consume Impala debug webpages."""
+    query = "select sleep(1000)"
+    handle = self.client.execute_async(query)
+    query_id = handle.get_handle().id
+    results = self.client.fetch(query, handle)
+    self.client.close()
+
+    start_time_sub_sec_str = ""
+    end_time_sub_sec_str = ""
+    start_time = ""
+    end_time = ""
+
+    MAX_RETRIES = 60
+    for retries in xrange(MAX_RETRIES):
+      tree = self.impalad_test_service.get_thrift_profile(query_id)
+
+      if tree is None:
+        continue
+      # tree.nodes[1] corresponds to ClientRequestState::summary_profile_
+      # See be/src/service/client-request-state.[h|cc].
+      start_time = tree.nodes[1].info_strings["Start Time"]
+      end_time = tree.nodes[1].info_strings["End Time"]
+      # Start and End Times are of the form "2017-12-07 22:26:52.167711000"
+      start_time_sub_sec_str = start_time.split('.')[-1]
+      end_time_sub_sec_str = end_time.split('.')[-1]
+      if len(end_time_sub_sec_str) == 0:
+        logging.info('end_time_sub_sec_str hasn\'t shown up yet, retries=%d', retries)
+        time.sleep(1)
+        continue
+      assert len(end_time_sub_sec_str) == 9, end_time
+      assert len(start_time_sub_sec_str) == 9, start_time
+      return True
+
+    # If we're here, we didn't get the final thrift profile from the debug web page.
+    # This could happen due to heavy system load. The test is then inconclusive.
+    # Log a message and fail this run.
+    dbg_str = 'Debug thrift profile for query ' + str(query_id) + ' not available in '
+    dbg_str += str(MAX_RETRIES) + ' seconds, '
+    dbg_str += '(' + start_time + ', ' + end_time + ').'
+    assert False, dgb_str