You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by mi...@apache.org on 2018/10/11 19:54:15 UTC

[10/10] impala git commit: IMPALA-7693: stress test: fix Query().name

IMPALA-7693: stress test: fix Query().name

In the refactor as part of IMPALA-7460, loading of TPC queries no longer
returned query names (i.e., Q37). The name's presence doesn't change the
behavior of the stress test, but it does lead to nicer debuggable and
reable things, like log messages, profiles, result hashes, and runtime
info.

- Change load_tpc_queries() to return a dictionary, not a list.
- Set the .name attribute.
- Enhance the unit test to at least ensure load_tpc_queries() does not
  regress again.

Testing, in addition to passing test above:
- Ran stress test and performed binary search. Made sure query names
  were present in logging, runtime info, result hashes, and profiles.

Change-Id: Ie8c40beababf4c122dc8fed6c0544ee37871b9b2
Reviewed-on: http://gerrit.cloudera.org:8080/11651
Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
Tested-by: Michael Brown <mi...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/00471912
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/00471912
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/00471912

Branch: refs/heads/master
Commit: 0047191262d6a90eb704dff880efe6e625b805bc
Parents: 0cbe37a
Author: Michael Brown <mi...@cloudera.com>
Authored: Wed Oct 10 14:34:29 2018 -0700
Committer: Michael Brown <mi...@cloudera.com>
Committed: Thu Oct 11 16:43:14 2018 +0000

----------------------------------------------------------------------
 tests/infra/test_stress_infra.py  | 5 ++++-
 tests/stress/concurrent_select.py | 5 +++--
 tests/util/test_file_parser.py    | 5 +++--
 3 files changed, 10 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/00471912/tests/infra/test_stress_infra.py
----------------------------------------------------------------------
diff --git a/tests/infra/test_stress_infra.py b/tests/infra/test_stress_infra.py
index 7e97ffa..cd9fd46 100644
--- a/tests/infra/test_stress_infra.py
+++ b/tests/infra/test_stress_infra.py
@@ -55,4 +55,7 @@ class TestStressInfra(ImpalaTestSuite):
     Test that the stress test will properly load TPC workloads.
     """
     workload, count = count_map
-    assert count == len(load_tpc_queries(workload))
+    queries = load_tpc_queries(workload)
+    assert count == len(queries)
+    for name in queries:
+      assert name.startswith('q')

http://git-wip-us.apache.org/repos/asf/impala/blob/00471912/tests/stress/concurrent_select.py
----------------------------------------------------------------------
diff --git a/tests/stress/concurrent_select.py b/tests/stress/concurrent_select.py
index 844c245..688cdd4 100755
--- a/tests/stress/concurrent_select.py
+++ b/tests/stress/concurrent_select.py
@@ -1307,9 +1307,10 @@ def load_tpc_queries(workload):
   """Returns a list of TPC queries. 'workload' should either be 'tpch' or 'tpcds'."""
   LOG.info("Loading %s queries", workload)
   queries = []
-  for query_text in test_file_parser.load_tpc_queries(workload):
+  for query_name, query_sql in test_file_parser.load_tpc_queries(workload).iteritems():
     query = Query()
-    query.sql = query_text
+    query.name = query_name
+    query.sql = query_sql
     queries.append(query)
   return queries
 

http://git-wip-us.apache.org/repos/asf/impala/blob/00471912/tests/util/test_file_parser.py
----------------------------------------------------------------------
diff --git a/tests/util/test_file_parser.py b/tests/util/test_file_parser.py
index ccd0af5..3b935d5 100644
--- a/tests/util/test_file_parser.py
+++ b/tests/util/test_file_parser.py
@@ -320,7 +320,7 @@ def write_test_file(test_file_name, test_file_sections, encoding=None):
 def load_tpc_queries(workload):
   """Returns a list of TPC queries. 'workload' should either be 'tpch' or 'tpcds'."""
   LOG.info("Loading %s queries", workload)
-  queries = list()
+  queries = dict()
   query_dir = os.path.join(
       os.environ['IMPALA_HOME'], "testdata", "workloads", workload, "queries")
   # IMPALA-6715 and others from the past: This pattern enforces the queries we actually
@@ -331,6 +331,7 @@ def load_tpc_queries(workload):
     match = file_name_pattern.search(query_file)
     if not match:
       continue
+    query_name = match.group(1)
     file_path = os.path.join(query_dir, query_file)
     test_cases = parse_query_test_file(file_path)
     file_queries = list()
@@ -341,5 +342,5 @@ def load_tpc_queries(workload):
       raise Exception(
           "Expected exactly 1 query to be in file %s but got %s"
           % (file_path, len(file_queries)))
-    queries.append(file_queries[0])
+    queries[query_name] = file_queries[0]
   return queries