You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by sa...@apache.org on 2018/07/03 16:18:25 UTC

[2/4] impala git commit: IMPALA-6352: Dump backtrace on failure of TestTableSample

IMPALA-6352: Dump backtrace on failure of TestTableSample

TestTableSample is a flaky test which has been failing very rarely due
to a possible hung thread. Therefore this patch adds a timeout to the
test and logs the backtrace of all impalads if timeout occurs, so we
can get more information on the state of those threads.

Change-Id: I73fcdd30863cee105584c947bb0c48cf872809c1
Reviewed-on: http://gerrit.cloudera.org:8080/10851
Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/f3b1c4bc
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/f3b1c4bc
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/f3b1c4bc

Branch: refs/heads/master
Commit: f3b1c4bc65031899ca9d7e41ab6c7da79b18777d
Parents: f20ecad
Author: Bikramjeet Vig <bi...@cloudera.com>
Authored: Mon Jul 2 14:27:09 2018 -0700
Committer: Impala Public Jenkins <im...@cloudera.com>
Committed: Tue Jul 3 02:48:29 2018 +0000

----------------------------------------------------------------------
 tests/beeswax/impala_beeswax.py      | 26 +++++++++++++++++++++++---
 tests/common/impala_connection.py    |  5 +++++
 tests/query_test/test_tablesample.py | 23 +++++++++++++++++++++--
 3 files changed, 49 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/f3b1c4bc/tests/beeswax/impala_beeswax.py
----------------------------------------------------------------------
diff --git a/tests/beeswax/impala_beeswax.py b/tests/beeswax/impala_beeswax.py
index e21f896..9489ed4 100644
--- a/tests/beeswax/impala_beeswax.py
+++ b/tests/beeswax/impala_beeswax.py
@@ -342,7 +342,7 @@ class ImpalaBeeswaxClient(object):
     """Executes a query and waits for completion"""
     handle = self.execute_query_async(query_string, user=user)
     # Wait for the query to finish execution.
-    self.wait_for_completion(handle)
+    self.wait_for_finished(handle)
     return handle
 
   def cancel_query(self, query_id):
@@ -351,8 +351,9 @@ class ImpalaBeeswaxClient(object):
   def close_query(self, handle):
     self.__do_rpc(lambda: self.imp_service.close(handle))
 
-  def wait_for_completion(self, query_handle):
-    """Given a query handle, polls the coordinator waiting for the query to complete"""
+  def wait_for_finished(self, query_handle):
+    """Given a query handle, polls the coordinator waiting for the query to transition to
+       'FINISHED' state"""
     while True:
       query_state = self.get_state(query_handle)
       # if the rpc succeeded, the output is the query state
@@ -367,6 +368,25 @@ class ImpalaBeeswaxClient(object):
           self.close_query(query_handle)
       time.sleep(0.05)
 
+  def wait_for_finished_timeout(self, query_handle, timeout=10):
+    """Given a query handle and a timeout, polls the coordinator waiting for the query to
+       transition to 'FINISHED' state till 'timeout' seconds"""
+    start_time = time.time()
+    while (time.time() - start_time < timeout):
+      query_state = self.get_state(query_handle)
+      # if the rpc succeeded, the output is the query state
+      if query_state == self.query_states["FINISHED"]:
+        return True
+      elif query_state == self.query_states["EXCEPTION"]:
+        try:
+          error_log = self.__do_rpc(
+            lambda: self.imp_service.get_log(query_handle.log_context))
+          raise ImpalaBeeswaxException("Query aborted:" + error_log, None)
+        finally:
+          self.close_query(query_handle)
+      time.sleep(0.05)
+    return False
+
   def wait_for_admission_control(self, query_handle):
     """Given a query handle, polls the coordinator waiting for it to complete
       admission control processing of the query"""

http://git-wip-us.apache.org/repos/asf/impala/blob/f3b1c4bc/tests/common/impala_connection.py
----------------------------------------------------------------------
diff --git a/tests/common/impala_connection.py b/tests/common/impala_connection.py
index 84495dc..b075506 100644
--- a/tests/common/impala_connection.py
+++ b/tests/common/impala_connection.py
@@ -179,6 +179,11 @@ class BeeswaxConnection(ImpalaConnection):
     LOG.info("-- getting runtime profile operation: %s" % operation_handle)
     return self.__beeswax_client.get_runtime_profile(operation_handle.get_handle())
 
+  def wait_for_finished_timeout(self, operation_handle, timeout):
+    LOG.info("-- waiting for query to reach FINISHED state: %s" % operation_handle)
+    return self.__beeswax_client.wait_for_finished_timeout(
+      operation_handle.get_handle(), timeout)
+
   def wait_for_admission_control(self, operation_handle):
     LOG.info("-- waiting for completion of the admission control processing of the "
         "query: %s" % operation_handle)

http://git-wip-us.apache.org/repos/asf/impala/blob/f3b1c4bc/tests/query_test/test_tablesample.py
----------------------------------------------------------------------
diff --git a/tests/query_test/test_tablesample.py b/tests/query_test/test_tablesample.py
index f3eaaaa..4bc7e1f 100644
--- a/tests/query_test/test_tablesample.py
+++ b/tests/query_test/test_tablesample.py
@@ -18,6 +18,7 @@
 # Tests the TABLESAMPLE clause.
 
 import pytest
+import subprocess
 
 from tests.common.impala_test_suite import ImpalaTestSuite
 from tests.common.test_vector import ImpalaTestDimension
@@ -54,8 +55,26 @@ class TestTableSample(ImpalaTestSuite):
     for perc in [5, 20, 50]:
       rep_sql = ""
       if repeatable: rep_sql = " repeatable(1)"
-      result = self.client.execute(
-        "select count(*) from alltypes tablesample system(%s)%s" % (perc, rep_sql))
+      sql_stmt = "select count(*) from alltypes tablesample system(%s)%s" \
+                 % (perc, rep_sql)
+      handle = self.client.execute_async(sql_stmt)
+      # IMPALA-6352: flaky test, possibly due to a hung thread. Wait for 500 sec before
+      # failing and logging the backtraces of all impalads.
+      is_finished = self.client.wait_for_finished_timeout(handle, 500)
+      assert is_finished, 'Query Timed out. Dumping backtrace of all threads in ' \
+                          'impalads:\nthreads in the impalad1: %s \nthreads in the ' \
+                          'impalad2: %s \nthreads in the impalad3: %s' % \
+                        (subprocess.check_output(
+                          "gdb -ex \"set pagination 0\" -ex \"thread apply all bt\"  "
+                          "--batch -p $(pgrep impalad | sed -n 1p)", shell=True),
+                         subprocess.check_output(
+                          "gdb -ex \"set pagination 0\" -ex \"thread apply all bt\"  "
+                          "--batch -p $(pgrep impalad | sed -n 2p)", shell=True),
+                         subprocess.check_output(
+                          "gdb -ex \"set pagination 0\" -ex \"thread apply all bt\"  "
+                          "--batch -p $(pgrep impalad | sed -n 3p)", shell=True))
+      result = self.client.fetch(sql_stmt, handle)
+      self.client.close_query(handle)
       count = int(result.data[0])
       assert count < baseline_count
       if prev_count and repeatable: