You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by bh...@apache.org on 2018/04/13 23:00:14 UTC
[4/5] impala git commit: IMPALA-6845: TestHdfsQueries causes some
tests to be run twice
IMPALA-6845: TestHdfsQueries causes some tests to be run twice
TestHdfsQueries is a subclass of TestQueries and inherits of all its
'test_*' methods, causing these tests to be run twice any time
test_queries.py is run. This was not intentional (it was subclassed
just to inherit 'add_test_dimensions') and causes test runs to take
longer than necessary.
This patch removes the subclass relationship and copies the logic in
add_test_dimensions() from TestQueries in HdfsTestQueries, with a
convenience function added to minimize code duplication.
Testing:
- Ran test_queries.py under both 'core' and 'exhaustive' and checked
that the same tests are run, except all now only a single time each.
Change-Id: Ida659aa7b5131a6a7469baa93a41f7581bd0659a
Reviewed-on: http://gerrit.cloudera.org:8080/10053
Reviewed-by: Michael Brown <mi...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/16bed5c3
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/16bed5c3
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/16bed5c3
Branch: refs/heads/master
Commit: 16bed5c3a91d2575e1a0d3327735df15d51b5bf6
Parents: ffb74e7
Author: Thomas Tauber-Marshall <tm...@cloudera.com>
Authored: Thu Apr 12 23:11:05 2018 +0000
Committer: Impala Public Jenkins <im...@cloudera.com>
Committed: Fri Apr 13 21:46:17 2018 +0000
----------------------------------------------------------------------
tests/common/test_dimensions.py | 17 ++++++++++++++++-
tests/query_test/test_queries.py | 27 ++++++++++++---------------
2 files changed, 28 insertions(+), 16 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/impala/blob/16bed5c3/tests/common/test_dimensions.py
----------------------------------------------------------------------
diff --git a/tests/common/test_dimensions.py b/tests/common/test_dimensions.py
index df3f8c2..434b884 100644
--- a/tests/common/test_dimensions.py
+++ b/tests/common/test_dimensions.py
@@ -17,10 +17,11 @@
# Common test dimensions and associated utility functions.
+import copy
import os
from itertools import product
-from tests.common.test_vector import ImpalaTestDimension
+from tests.common.test_vector import ImpalaTestDimension, ImpalaTestVector
WORKLOAD_DIR = os.environ['IMPALA_WORKLOAD_DIR']
@@ -179,6 +180,20 @@ def create_exec_option_dimension_from_dict(exec_option_dimensions):
# Build a test vector out of it
return ImpalaTestDimension('exec_option', *exec_option_dimension_values)
+def extend_exec_option_dimension(test_suite, key, value):
+ """
+ Takes an ImpalaTestSuite object 'test_suite' and extends the exec option test dimension
+ by creating a copy of each existing exec option value that has 'key' set to 'value',
+ doubling the number of tests that will be run.
+ """
+ dim = test_suite.ImpalaTestMatrix.dimensions["exec_option"]
+ new_value = []
+ for v in dim:
+ new_value.append(ImpalaTestVector.Value(v.name, copy.copy(v.value)))
+ new_value[-1].value[key] = value
+ dim.extend(new_value)
+ test_suite.ImpalaTestMatrix.add_dimension(dim)
+
def get_dataset_from_workload(workload):
# TODO: We need a better way to define the workload -> dataset mapping so we can
# extract it without reading the actual test vector file
http://git-wip-us.apache.org/repos/asf/impala/blob/16bed5c3/tests/query_test/test_queries.py
----------------------------------------------------------------------
diff --git a/tests/query_test/test_queries.py b/tests/query_test/test_queries.py
index 4667d96..14ecefe 100644
--- a/tests/query_test/test_queries.py
+++ b/tests/query_test/test_queries.py
@@ -22,7 +22,7 @@ import pytest
import re
from tests.common.impala_test_suite import ImpalaTestSuite
-from tests.common.test_dimensions import create_uncompressed_text_dimension
+from tests.common.test_dimensions import create_uncompressed_text_dimension, extend_exec_option_dimension
from tests.common.test_vector import ImpalaTestVector
class TestQueries(ImpalaTestSuite):
@@ -33,18 +33,9 @@ class TestQueries(ImpalaTestSuite):
cls.ImpalaTestMatrix.add_constraint(lambda v:\
v.get_value('table_format').file_format == 'parquet')
- # Manually adding a test dimension here to test the small query opt
- # in exhaustive.
- # TODO Cleanup required, allow adding values to dimensions without having to
- # manually explode them
+ # Adding a test dimension here to test the small query opt in exhaustive.
if cls.exploration_strategy() == 'exhaustive':
- dim = cls.ImpalaTestMatrix.dimensions["exec_option"]
- new_value = []
- for v in dim:
- new_value.append(ImpalaTestVector.Value(v.name, copy.copy(v.value)))
- new_value[-1].value["exec_single_node_rows_threshold"] = 100
- dim.extend(new_value)
- cls.ImpalaTestMatrix.add_dimension(dim)
+ extend_exec_option_dimension(cls, "exec_single_node_rows_threshold", "100")
@classmethod
def get_workload(cls):
@@ -215,9 +206,7 @@ class TestQueriesParquetTables(ImpalaTestSuite):
self.run_test_case('QueryTest/single-node-large-sorts', vector)
# Tests for queries in HDFS-specific tables, e.g. AllTypesAggMultiFilesNoPart.
-# This is a subclass of TestQueries to get the extra test dimension for
-# exec_single_node_rows_threshold in exhaustive.
-class TestHdfsQueries(TestQueries):
+class TestHdfsQueries(ImpalaTestSuite):
@classmethod
def add_test_dimensions(cls):
super(TestHdfsQueries, cls).add_test_dimensions()
@@ -225,6 +214,14 @@ class TestHdfsQueries(TestQueries):
cls.ImpalaTestMatrix.add_constraint(lambda v:\
v.get_value('table_format').file_format != 'kudu')
+ # Adding a test dimension here to test the small query opt in exhaustive.
+ if cls.exploration_strategy() == 'exhaustive':
+ extend_exec_option_dimension(cls, "exec_single_node_rows_threshold", "100")
+
+ @classmethod
+ def get_workload(cls):
+ return 'functional-query'
+
def test_hdfs_scan_node(self, vector):
self.run_test_case('QueryTest/hdfs-scan-node', vector)