You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2018/03/08 16:15:06 UTC

[5/5] impala git commit: IMPALA-6602: fixes flaky expiration test

IMPALA-6602: fixes flaky expiration test

The test_query_expiration test assumes that a metric and
the query state are maintained atomically. Since they're
not, occasionaly flakes (false negatives) occur.

The fix in this patch is to loop until the expected state
is seen. If the expected state is not seen with a given number
of iterations, the test fails. These tests depend on timing so
if this validation takes too long, the test will also fail.
Such looping is used in the two places where its assumed that the
client state and metrics are maintained atomically.

Change-Id: I7aabed87d84d5cfd8078cc6c39df48e22ff30afc
Reviewed-on: http://gerrit.cloudera.org:8080/9538
Reviewed-by: Tim Armstrong <ta...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/1f573e08
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/1f573e08
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/1f573e08

Branch: refs/heads/2.x
Commit: 1f573e08b8ab8ab84ad06555e6647e66970a0ab5
Parents: 1201f28
Author: Vuk Ercegovac <ve...@cloudera.com>
Authored: Wed Mar 7 13:44:47 2018 -0800
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Thu Mar 8 10:24:38 2018 +0000

----------------------------------------------------------------------
 tests/custom_cluster/test_query_expiration.py | 25 ++++++++++++++++------
 1 file changed, 19 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/1f573e08/tests/custom_cluster/test_query_expiration.py
----------------------------------------------------------------------
diff --git a/tests/custom_cluster/test_query_expiration.py b/tests/custom_cluster/test_query_expiration.py
index 0e47ce2..ed2a636 100644
--- a/tests/custom_cluster/test_query_expiration.py
+++ b/tests/custom_cluster/test_query_expiration.py
@@ -99,10 +99,13 @@ class TestQueryExpiration(CustomClusterTestSuite):
     # expirations but only one should be counted.
     impalad.service.wait_for_metric_value('impala-server.num-queries-expired',
                                           num_expired + len(handles))
-    assert (client.get_state(default_timeout_expire_handle) ==
-            client.QUERY_STATES['EXCEPTION'])
-    assert (client.get_state(default_timeout_expire_handle2) ==
-            client.QUERY_STATES['EXCEPTION'])
+    # The metric and client state are not atomically maintained. Since the
+    # expiration metric has just been reached, accessing the client state
+    # is guarded in a loop to avoid flaky false negatives.
+    self.__expect_client_state(client, default_timeout_expire_handle,
+                               client.QUERY_STATES['EXCEPTION'])
+    self.__expect_client_state(client, default_timeout_expire_handle2,
+                               client.QUERY_STATES['EXCEPTION'])
 
     # Check that we didn't wait too long to be expired (double the timeout is sufficiently
     # large to avoid most noise in measurement)
@@ -125,7 +128,6 @@ class TestQueryExpiration(CustomClusterTestSuite):
         # We fetched from some cancelled handles above, which unregistered the queries.
         assert 'Invalid or unknown query handle' in str(e)
 
-
   @pytest.mark.execute_serially
   @CustomClusterTestSuite.with_args("--idle_query_timeout=0")
   def test_query_expiration_no_default(self, vector):
@@ -170,6 +172,17 @@ class TestQueryExpiration(CustomClusterTestSuite):
     except Exception, e:
       assert re.search(exception_regex, str(e))
 
+  def __expect_client_state(self, client, handle, expected_state, timeout=0.1):
+    """Try to fetch 'expected_state' from 'client' within 'timeout' seconds.
+    Fail if unable."""
+    start_time = time()
+    actual_state = None
+    while (time() - start_time < timeout):
+      actual_state = client.get_state(handle)
+      if actual_state == expected_state:
+        break
+    assert expected_state == actual_state
+
   @pytest.mark.execute_serially
   @CustomClusterTestSuite.with_args("--idle_query_timeout=1")
   def test_concurrent_query_expiration(self, vector):
@@ -250,7 +263,7 @@ class TestQueryExpiration(CustomClusterTestSuite):
     for t in non_expiring_threads:
       assert t.success
     for t in expiring_threads:
-      assert client.get_state(t.handle) == client.QUERY_STATES['EXCEPTION']
+      self.__expect_client_state(client, t.handle, client.QUERY_STATES['EXCEPTION'])
     for t in time_limit_threads:
       assert re.search(
           "Query [0-9a-f]+:[0-9a-f]+ expired due to execution time limit of 1s000ms",