You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by tm...@apache.org on 2019/02/12 04:06:58 UTC

[impala] 04/05: IMPALA-8173: Fix KeyError in run-workload.py

This is an automated email from the ASF dual-hosted git repository.

tmarshall pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 8b5cb576d23d771456ed571ee0d362d96af6ae13
Author: Thomas Tauber-Marshall <tm...@cloudera.com>
AuthorDate: Thu Feb 7 14:09:38 2019 -0800

    IMPALA-8173: Fix KeyError in run-workload.py
    
    A recent change (IMPALA-7694) causes run-workload.py to fail with a
    KeyError due to trying to construct an ImpalaBeeswaxResult without a
    query id.
    
    This patch fixes that issue and two related issues:
    - This problem was not caught by automated testing even though we run
      run-workload.py in run-all-tests.sh because of an issue where
      queries that fail to produce results are silently ignored.
    - Fixes an issue where queries that fail to produce results would
      confusingly produce the error:
      'NoneType' object has no attribute 'time_taken'
    
    Testing:
    - Ran run-workload.py locally and demonstrated that it works now.
    - Ran run-all-tests.sh locally and demonstrated that it fails now when
      the KeyError issue isn't fixed.
    
    Change-Id: I5b8a3c3dd7499335b9290d5667c194e8c0eabd12
    Reviewed-on: http://gerrit.cloudera.org:8080/12397
    Reviewed-by: Thomas Marshall <th...@cmu.edu>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 tests/performance/query_exec_functions.py |  4 ++--
 tests/performance/scheduler.py            | 22 ++++++++++++++--------
 2 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/tests/performance/query_exec_functions.py b/tests/performance/query_exec_functions.py
index 1c144bd..352c9a7 100644
--- a/tests/performance/query_exec_functions.py
+++ b/tests/performance/query_exec_functions.py
@@ -192,7 +192,7 @@ def execute_using_impala_beeswax(query, query_config):
   # create a map for query options and the query names to send to the plugin
   context = build_context(query, query_config)
   if plugin_runner: plugin_runner.run_plugins_pre(context=context, scope="Query")
-  result = ImpalaBeeswaxResult()
+  result = None
   try:
     result = client.execute(query.query_str)
   except Exception, e:
@@ -232,7 +232,7 @@ def construct_exec_result(result, exec_result):
   """
 
   # Return immedietely if the query failed.
-  if not result.success: return exec_result
+  if result is None or not result.success: return exec_result
   exec_result.success = True
   attrs = ['data', 'runtime_profile', 'start_time',
       'time_taken', 'summary', 'exec_summary']
diff --git a/tests/performance/scheduler.py b/tests/performance/scheduler.py
index 760b331..e612454 100644
--- a/tests/performance/scheduler.py
+++ b/tests/performance/scheduler.py
@@ -112,19 +112,20 @@ class Scheduler(object):
           try:
             query_executor.prepare(self._get_next_impalad())
             query_executor.execute(plan_first=self.plan_first)
-          # QueryExecutor only throws an exception if the query fails and abort_on_error
-          # is set to True. If abort_on_error is False, then the exception is logged on
+          # QueryExecutor only throws an exception if the query fails and exit_on_error
+          # is set to True. If exit_on_error is False, then the exception is logged on
           # the console and execution moves on to the next query.
           except Exception as e:
             LOG.error("Query %s Failed: %s" % (query_name, str(e)))
             self._exit.set()
           finally:
-            LOG.info("%s query iteration %d finished in %.2f seconds" % (query_name, i+1,
-              query_executor.result.time_taken))
-            result = query_executor.result
-            result.client_name = thread_num + 1
-            self._results.append(result)
-          workload_time_sec += query_executor.result.time_taken
+            if query_executor.result:
+              LOG.info("%s query iteration %d finished in %.2f seconds" %
+                       (query_name, i + 1, query_executor.result.time_taken))
+              result = query_executor.result
+              result.client_name = thread_num + 1
+              self._results.append(result)
+              workload_time_sec += query_executor.result.time_taken
       if self.query_iterations == 1:
         LOG.info("Workload iteration %d finished in %s seconds" % (j+1, workload_time_sec))
       cursor = getattr(threading.current_thread(), 'cursor', None)
@@ -139,3 +140,8 @@ class Scheduler(object):
     for thread_num,t in enumerate(self._threads):
       t.join()
       LOG.info("Finished %s" % self._thread_name % thread_num)
+    num_expected_results = len(self._threads) * self.iterations * \
+        self.query_iterations * len(self.query_executors)
+    if len(self._results) != num_expected_results:
+      raise RuntimeError("Unexpected number of results generated (%s vs. %s)." %
+          (len(self._results), num_expected_results))