You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by jo...@apache.org on 2021/03/31 03:28:00 UTC
[impala] 01/04: IMPALA-10397: De-flake test_single_workload

This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 45fb0fb3e7d8541ef2886cb8d5c9510955bbe242
Author: Bikramjeet Vig <bi...@gmail.com>
AuthorDate: Mon Mar 29 18:20:35 2021 -0700

    IMPALA-10397: De-flake test_single_workload
    
    This patch removes a flaky part of the test that relies on query
    completion rate. Since we are already verifying that number of
    healthy executor groups increases, this additional check is not
    adding much to the test.
    
    Change-Id: I6f75afdbe676d9dd6922b6ba8aa1919daa161947
    Reviewed-on: http://gerrit.cloudera.org:8080/17239
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 tests/custom_cluster/test_auto_scaling.py | 26 +++-----------------------
 1 file changed, 3 insertions(+), 23 deletions(-)

diff --git a/tests/custom_cluster/test_auto_scaling.py b/tests/custom_cluster/test_auto_scaling.py
index c2863ca..7547d98 100644
--- a/tests/custom_cluster/test_auto_scaling.py
+++ b/tests/custom_cluster/test_auto_scaling.py
@@ -69,9 +69,9 @@ class TestAutoScaling(CustomClusterTestSuite):
   @SkipIfEC.fix_later
   def test_single_workload(self):
     """This test exercises the auto-scaling logic in the admission controller. It spins up
-    a base cluster (coordinator, catalog, statestore), runs some queries to observe that
-    new executors are started, then stops the workload and observes that the cluster gets
-    shutdown."""
+    a base cluster (coordinator, catalog, statestore), runs a workload to initiate a
+    scaling up event as the queries start queuing, then stops the workload and observes
+    that the cluster gets shutdown."""
     GROUP_SIZE = 2
     EXECUTOR_SLOTS = 3
     auto_scaler = AutoScaler(executor_slots=EXECUTOR_SLOTS, group_size=GROUP_SIZE)
@@ -96,7 +96,6 @@ class TestAutoScaling(CustomClusterTestSuite):
       assert any(self._get_total_admitted_queries() >= 10 or sleep(1)
                  for _ in range(self.STATE_CHANGE_TIMEOUT_S)), \
           "Did not admit enough queries within %s s" % self.STATE_CHANGE_TIMEOUT_S
-      single_group_query_rate = workload.get_query_rate()
       # Wait for second executor group to start
       cluster_size = (2 * GROUP_SIZE) + 1
       assert any(self._get_num_backends() >= cluster_size or sleep(1)
@@ -106,25 +105,6 @@ class TestAutoScaling(CustomClusterTestSuite):
       assert self.impalad_test_service.get_metric_value(
         "cluster-membership.executor-groups.total-healthy") >= 2
 
-      # Wait for query rate to exceed the maximum for a single executor group. In the past
-      # we tried to wait for it to pass a higher threshold but on some platforms we saw
-      # that it was too flaky.
-      max_query_rate = 0
-      # This barrier has been flaky in the past so we wait 2x as long as for the other
-      # checks.
-      end = time() + 2 * self.STATE_CHANGE_TIMEOUT_S
-      while time() < end:
-        current_rate = workload.get_query_rate()
-        LOG.info("Current rate: %s" % current_rate)
-        max_query_rate = max(max_query_rate, current_rate)
-        if max_query_rate > single_group_query_rate:
-          break
-        sleep(1)
-
-      assert max_query_rate > single_group_query_rate, "Query rate did not exceed %s " \
-          "within %s s. Maximum was %s. Cluster size is %s." % (single_group_query_rate,
-          self.STATE_CHANGE_TIMEOUT_S, max_query_rate, cluster_size)
-
       LOG.info("Stopping workload")
       workload.stop()