You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by bh...@apache.org on 2018/04/13 23:00:14 UTC
[4/5] impala git commit: IMPALA-6845: TestHdfsQueries causes some tests to be run twice

IMPALA-6845: TestHdfsQueries causes some tests to be run twice

TestHdfsQueries is a subclass of TestQueries and inherits of all its
'test_*' methods, causing these tests to be run twice any time
test_queries.py is run. This was not intentional (it was subclassed
just to inherit 'add_test_dimensions') and causes test runs to take
longer than necessary.

This patch removes the subclass relationship and copies the logic in
add_test_dimensions() from TestQueries in HdfsTestQueries, with a
convenience function added to minimize code duplication.

Testing:
- Ran test_queries.py under both 'core' and 'exhaustive' and checked
  that the same tests are run, except all now only a single time each.

Change-Id: Ida659aa7b5131a6a7469baa93a41f7581bd0659a
Reviewed-on: http://gerrit.cloudera.org:8080/10053
Reviewed-by: Michael Brown <mi...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/16bed5c3
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/16bed5c3
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/16bed5c3

Branch: refs/heads/master
Commit: 16bed5c3a91d2575e1a0d3327735df15d51b5bf6
Parents: ffb74e7
Author: Thomas Tauber-Marshall <tm...@cloudera.com>
Authored: Thu Apr 12 23:11:05 2018 +0000
Committer: Impala Public Jenkins <im...@cloudera.com>
Committed: Fri Apr 13 21:46:17 2018 +0000

----------------------------------------------------------------------
 tests/common/test_dimensions.py  | 17 ++++++++++++++++-
 tests/query_test/test_queries.py | 27 ++++++++++++---------------
 2 files changed, 28 insertions(+), 16 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/16bed5c3/tests/common/test_dimensions.py
----------------------------------------------------------------------
diff --git a/tests/common/test_dimensions.py b/tests/common/test_dimensions.py
index df3f8c2..434b884 100644
--- a/tests/common/test_dimensions.py
+++ b/tests/common/test_dimensions.py
@@ -17,10 +17,11 @@
 
 # Common test dimensions and associated utility functions.
 
+import copy
 import os
 from itertools import product
 
-from tests.common.test_vector import ImpalaTestDimension
+from tests.common.test_vector import ImpalaTestDimension, ImpalaTestVector
 
 WORKLOAD_DIR = os.environ['IMPALA_WORKLOAD_DIR']
 
@@ -179,6 +180,20 @@ def create_exec_option_dimension_from_dict(exec_option_dimensions):
   # Build a test vector out of it
   return ImpalaTestDimension('exec_option', *exec_option_dimension_values)
 
+def extend_exec_option_dimension(test_suite, key, value):
+  """
+  Takes an ImpalaTestSuite object 'test_suite' and extends the exec option test dimension
+  by creating a copy of each existing exec option value that has 'key' set to 'value',
+  doubling the number of tests that will be run.
+  """
+  dim = test_suite.ImpalaTestMatrix.dimensions["exec_option"]
+  new_value = []
+  for v in dim:
+    new_value.append(ImpalaTestVector.Value(v.name, copy.copy(v.value)))
+    new_value[-1].value[key] = value
+  dim.extend(new_value)
+  test_suite.ImpalaTestMatrix.add_dimension(dim)
+
 def get_dataset_from_workload(workload):
   # TODO: We need a better way to define the workload -> dataset mapping so we can
   # extract it without reading the actual test vector file

http://git-wip-us.apache.org/repos/asf/impala/blob/16bed5c3/tests/query_test/test_queries.py
----------------------------------------------------------------------
diff --git a/tests/query_test/test_queries.py b/tests/query_test/test_queries.py
index 4667d96..14ecefe 100644
--- a/tests/query_test/test_queries.py
+++ b/tests/query_test/test_queries.py
@@ -22,7 +22,7 @@ import pytest
 import re
 
 from tests.common.impala_test_suite import ImpalaTestSuite
-from tests.common.test_dimensions import create_uncompressed_text_dimension
+from tests.common.test_dimensions import create_uncompressed_text_dimension, extend_exec_option_dimension
 from tests.common.test_vector import ImpalaTestVector
 
 class TestQueries(ImpalaTestSuite):
@@ -33,18 +33,9 @@ class TestQueries(ImpalaTestSuite):
       cls.ImpalaTestMatrix.add_constraint(lambda v:\
           v.get_value('table_format').file_format == 'parquet')
 
-    # Manually adding a test dimension here to test the small query opt
-    # in exhaustive.
-    # TODO Cleanup required, allow adding values to dimensions without having to
-    # manually explode them
+    # Adding a test dimension here to test the small query opt in exhaustive.
     if cls.exploration_strategy() == 'exhaustive':
-      dim = cls.ImpalaTestMatrix.dimensions["exec_option"]
-      new_value = []
-      for v in dim:
-        new_value.append(ImpalaTestVector.Value(v.name, copy.copy(v.value)))
-        new_value[-1].value["exec_single_node_rows_threshold"] = 100
-      dim.extend(new_value)
-      cls.ImpalaTestMatrix.add_dimension(dim)
+      extend_exec_option_dimension(cls, "exec_single_node_rows_threshold", "100")
 
   @classmethod
   def get_workload(cls):
@@ -215,9 +206,7 @@ class TestQueriesParquetTables(ImpalaTestSuite):
     self.run_test_case('QueryTest/single-node-large-sorts', vector)
 
 # Tests for queries in HDFS-specific tables, e.g. AllTypesAggMultiFilesNoPart.
-# This is a subclass of TestQueries to get the extra test dimension for
-# exec_single_node_rows_threshold in exhaustive.
-class TestHdfsQueries(TestQueries):
+class TestHdfsQueries(ImpalaTestSuite):
   @classmethod
   def add_test_dimensions(cls):
     super(TestHdfsQueries, cls).add_test_dimensions()
@@ -225,6 +214,14 @@ class TestHdfsQueries(TestQueries):
     cls.ImpalaTestMatrix.add_constraint(lambda v:\
         v.get_value('table_format').file_format != 'kudu')
 
+    # Adding a test dimension here to test the small query opt in exhaustive.
+    if cls.exploration_strategy() == 'exhaustive':
+      extend_exec_option_dimension(cls, "exec_single_node_rows_threshold", "100")
+
+  @classmethod
+  def get_workload(cls):
+    return 'functional-query'
+
   def test_hdfs_scan_node(self, vector):
     self.run_test_case('QueryTest/hdfs-scan-node', vector)