You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by tm...@apache.org on 2019/02/12 04:06:58 UTC
[impala] 04/05: IMPALA-8173: Fix KeyError in run-workload.py
This is an automated email from the ASF dual-hosted git repository.
tmarshall pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
commit 8b5cb576d23d771456ed571ee0d362d96af6ae13
Author: Thomas Tauber-Marshall <tm...@cloudera.com>
AuthorDate: Thu Feb 7 14:09:38 2019 -0800
IMPALA-8173: Fix KeyError in run-workload.py
A recent change (IMPALA-7694) causes run-workload.py to fail with a
KeyError due to trying to construct an ImpalaBeeswaxResult without a
query id.
This patch fixes that issue and two related issues:
- This problem was not caught by automated testing even though we run
run-workload.py in run-all-tests.sh because of an issue where
queries that fail to produce results are silently ignored.
- Fixes an issue where queries that fail to produce results would
confusingly produce the error:
'NoneType' object has no attribute 'time_taken'
Testing:
- Ran run-workload.py locally and demonstrated that it works now.
- Ran run-all-tests.sh locally and demonstrated that it fails now when
the KeyError issue isn't fixed.
Change-Id: I5b8a3c3dd7499335b9290d5667c194e8c0eabd12
Reviewed-on: http://gerrit.cloudera.org:8080/12397
Reviewed-by: Thomas Marshall <th...@cmu.edu>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
tests/performance/query_exec_functions.py | 4 ++--
tests/performance/scheduler.py | 22 ++++++++++++++--------
2 files changed, 16 insertions(+), 10 deletions(-)
diff --git a/tests/performance/query_exec_functions.py b/tests/performance/query_exec_functions.py
index 1c144bd..352c9a7 100644
--- a/tests/performance/query_exec_functions.py
+++ b/tests/performance/query_exec_functions.py
@@ -192,7 +192,7 @@ def execute_using_impala_beeswax(query, query_config):
# create a map for query options and the query names to send to the plugin
context = build_context(query, query_config)
if plugin_runner: plugin_runner.run_plugins_pre(context=context, scope="Query")
- result = ImpalaBeeswaxResult()
+ result = None
try:
result = client.execute(query.query_str)
except Exception, e:
@@ -232,7 +232,7 @@ def construct_exec_result(result, exec_result):
"""
# Return immedietely if the query failed.
- if not result.success: return exec_result
+ if result is None or not result.success: return exec_result
exec_result.success = True
attrs = ['data', 'runtime_profile', 'start_time',
'time_taken', 'summary', 'exec_summary']
diff --git a/tests/performance/scheduler.py b/tests/performance/scheduler.py
index 760b331..e612454 100644
--- a/tests/performance/scheduler.py
+++ b/tests/performance/scheduler.py
@@ -112,19 +112,20 @@ class Scheduler(object):
try:
query_executor.prepare(self._get_next_impalad())
query_executor.execute(plan_first=self.plan_first)
- # QueryExecutor only throws an exception if the query fails and abort_on_error
- # is set to True. If abort_on_error is False, then the exception is logged on
+ # QueryExecutor only throws an exception if the query fails and exit_on_error
+ # is set to True. If exit_on_error is False, then the exception is logged on
# the console and execution moves on to the next query.
except Exception as e:
LOG.error("Query %s Failed: %s" % (query_name, str(e)))
self._exit.set()
finally:
- LOG.info("%s query iteration %d finished in %.2f seconds" % (query_name, i+1,
- query_executor.result.time_taken))
- result = query_executor.result
- result.client_name = thread_num + 1
- self._results.append(result)
- workload_time_sec += query_executor.result.time_taken
+ if query_executor.result:
+ LOG.info("%s query iteration %d finished in %.2f seconds" %
+ (query_name, i + 1, query_executor.result.time_taken))
+ result = query_executor.result
+ result.client_name = thread_num + 1
+ self._results.append(result)
+ workload_time_sec += query_executor.result.time_taken
if self.query_iterations == 1:
LOG.info("Workload iteration %d finished in %s seconds" % (j+1, workload_time_sec))
cursor = getattr(threading.current_thread(), 'cursor', None)
@@ -139,3 +140,8 @@ class Scheduler(object):
for thread_num,t in enumerate(self._threads):
t.join()
LOG.info("Finished %s" % self._thread_name % thread_num)
+ num_expected_results = len(self._threads) * self.iterations * \
+ self.query_iterations * len(self.query_executors)
+ if len(self._results) != num_expected_results:
+ raise RuntimeError("Unexpected number of results generated (%s vs. %s)." %
+ (len(self._results), num_expected_results))