You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by jo...@apache.org on 2019/04/02 17:53:01 UTC
[impala] 01/02: Fix stress test tracking of past metrics values

This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 26c700c947e418a6d75f248664e8e6f085d10478
Author: Thomas Tauber-Marshall <tm...@cloudera.com>
AuthorDate: Wed Mar 27 19:28:42 2019 +0000

    Fix stress test tracking of past metrics values
    
    The stress test keeps track of various metrics about queries that it
    has already run, such as the number of completed queries and the
    number of errors. The stress test also forks off several processes to
    handle running of queries.
    
    The intention is that the metrics should be shared across all of the
    processes. However, a recent patch (IMPALA-6662) changed these metrics
    to be stored in a dict and in the process caused them to no longer be
    shared. This patch solves the problem by storing the value in the dict
    as 'Value' objects, which are shared.
    
    Synchronization of access to these values is handled by taking
    '_query_runners_lock'.
    
    Testing:
    - Ran the stress test for 1000 queries on tpch 500 on a 10 node
      cluster. Previously, this would reach a point where there were 0
      'Active' queries and hang.
    
    Change-Id: I03b5e24b41fff1d33f9bb64b14a6d0b7a23a56e2
    Reviewed-on: http://gerrit.cloudera.org:8080/12903
    Reviewed-by: Tim Armstrong <ta...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 tests/stress/concurrent_select.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/tests/stress/concurrent_select.py b/tests/stress/concurrent_select.py
index f373f96..50fed55 100755
--- a/tests/stress/concurrent_select.py
+++ b/tests/stress/concurrent_select.py
@@ -264,7 +264,7 @@ class StressRunner(object):
     # These are the cumulative values of all the queries that have started/finished/-
     # dequeued, etc. on runners that have already died. Every time we notice that a query
     # runner has died, we update these values.
-    self._past_runner_metrics = defaultdict(lambda: 0)
+    self._past_runner_metrics = defaultdict(lambda: Value("i", 0))
 
     self._query_consumer_thread = None
     self._mem_polling_thread = None
@@ -275,12 +275,14 @@ class StressRunner(object):
         MUST hold '_query_runners_lock' before calling.
     """
     for key, val in query_runner.get_metric_vals():
-      self._past_runner_metrics[key] += val
+      self._past_runner_metrics[key].value += val
 
   def _calc_total_runner_metrics(self):
     """ Calculate the total of metrics across past and active query runners. """
-    totals = copy(self._past_runner_metrics)
+    totals = defaultdict(lambda: 0)
     with self._query_runners_lock:
+      for key in self._past_runner_metrics:
+        totals[key] = self._past_runner_metrics[key].value
       for query_runner in self._query_runners:
         for key, val in query_runner.get_metric_vals():
           totals[key] += val
@@ -295,7 +297,7 @@ class StressRunner(object):
     """ TODO: Get rid of this function after reformatting how we obtain query indices.
         _query_runners_lock MUST be taken before calling this function.
     """
-    total = self._past_runner_metrics[key]
+    total = self._past_runner_metrics[key].value
     for runner in self._query_runners:
       total += runner.get_metric_val(key)
     return total