You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by jo...@apache.org on 2019/04/02 17:53:01 UTC
[impala] 01/02: Fix stress test tracking of past metrics values
This is an automated email from the ASF dual-hosted git repository.
joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
commit 26c700c947e418a6d75f248664e8e6f085d10478
Author: Thomas Tauber-Marshall <tm...@cloudera.com>
AuthorDate: Wed Mar 27 19:28:42 2019 +0000
Fix stress test tracking of past metrics values
The stress test keeps track of various metrics about queries that it
has already run, such as the number of completed queries and the
number of errors. The stress test also forks off several processes to
handle running of queries.
The intention is that the metrics should be shared across all of the
processes. However, a recent patch (IMPALA-6662) changed these metrics
to be stored in a dict and in the process caused them to no longer be
shared. This patch solves the problem by storing the value in the dict
as 'Value' objects, which are shared.
Synchronization of access to these values is handled by taking
'_query_runners_lock'.
Testing:
- Ran the stress test for 1000 queries on tpch 500 on a 10 node
cluster. Previously, this would reach a point where there were 0
'Active' queries and hang.
Change-Id: I03b5e24b41fff1d33f9bb64b14a6d0b7a23a56e2
Reviewed-on: http://gerrit.cloudera.org:8080/12903
Reviewed-by: Tim Armstrong <ta...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
tests/stress/concurrent_select.py | 10 ++++++----
1 file changed, 6 insertions(+), 4 deletions(-)
diff --git a/tests/stress/concurrent_select.py b/tests/stress/concurrent_select.py
index f373f96..50fed55 100755
--- a/tests/stress/concurrent_select.py
+++ b/tests/stress/concurrent_select.py
@@ -264,7 +264,7 @@ class StressRunner(object):
# These are the cumulative values of all the queries that have started/finished/-
# dequeued, etc. on runners that have already died. Every time we notice that a query
# runner has died, we update these values.
- self._past_runner_metrics = defaultdict(lambda: 0)
+ self._past_runner_metrics = defaultdict(lambda: Value("i", 0))
self._query_consumer_thread = None
self._mem_polling_thread = None
@@ -275,12 +275,14 @@ class StressRunner(object):
MUST hold '_query_runners_lock' before calling.
"""
for key, val in query_runner.get_metric_vals():
- self._past_runner_metrics[key] += val
+ self._past_runner_metrics[key].value += val
def _calc_total_runner_metrics(self):
""" Calculate the total of metrics across past and active query runners. """
- totals = copy(self._past_runner_metrics)
+ totals = defaultdict(lambda: 0)
with self._query_runners_lock:
+ for key in self._past_runner_metrics:
+ totals[key] = self._past_runner_metrics[key].value
for query_runner in self._query_runners:
for key, val in query_runner.get_metric_vals():
totals[key] += val
@@ -295,7 +297,7 @@ class StressRunner(object):
""" TODO: Get rid of this function after reformatting how we obtain query indices.
_query_runners_lock MUST be taken before calling this function.
"""
- total = self._past_runner_metrics[key]
+ total = self._past_runner_metrics[key].value
for runner in self._query_runners:
total += runner.get_metric_val(key)
return total