You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by mj...@apache.org on 2017/02/22 18:12:01 UTC
[2/6] incubator-impala git commit: IMPALA-4904, IMPALA-4914: add targeted-stress to exhaustive tests

IMPALA-4904,IMPALA-4914: add targeted-stress to exhaustive tests

This patch allows any test suites whose workload is "targeted-stress" to
be run in so-called "exhaustive" mode. Before this patch, only suites
whose workload was "functional-query" would be run exhaustively. More on
this flaw is in IMPALA-3947.

The net effects are:

1. We fix IMPALA-4904, which allows test_ddl_stress to start running
   again.
2. We also improve the situation in IMPALA-4914 by getting
   TestSpillStress to run, though we don't fix its
   not-running-concurrently problem.

The other mini-cluster stress tests were disabled in this commit:
  IMPALA-2605: Omit the sort and mini stress tests
so they are not directly affected here.

I also tried to clarify what "exhaustive" means in some of our shell
scripts, via help text and comments.

An exhaustive build+test run showed test_ddl_stress and TestSpillStress
now get run and passed. This adds roughly 12 minutes to a build that's
on the order of 13-14 hours.

Change-Id: Ie6bd5bbd380e636d680368e908519b042d79dfec
Reviewed-on: http://gerrit.cloudera.org:8080/6002
Tested-by: Impala Public Jenkins
Reviewed-by: Jim Apple <jb...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/9414d53a
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/9414d53a
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/9414d53a

Branch: refs/heads/master
Commit: 9414d53a891448be13228f5fc63e089522e143aa
Parents: d2ae45b
Author: Michael Brown <mi...@cloudera.com>
Authored: Fri Feb 10 09:35:16 2017 -0800
Committer: Jim Apple <jb...@apache.org>
Committed: Tue Feb 21 21:12:36 2017 +0000

----------------------------------------------------------------------
 bin/run-all-tests.sh                  | 20 ++++++++++---
 buildall.sh                           | 19 +++++++-----
 tests/custom_cluster/test_spilling.py | 38 ++++++++++++++++++++----
 tests/stress/test_ddl_stress.py       | 47 ++++++++++++++++--------------
 4 files changed, 85 insertions(+), 39 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9414d53a/bin/run-all-tests.sh
----------------------------------------------------------------------
diff --git a/bin/run-all-tests.sh b/bin/run-all-tests.sh
index 728c0e5..692b510 100755
--- a/bin/run-all-tests.sh
+++ b/bin/run-all-tests.sh
@@ -87,11 +87,23 @@ do
   esac
 done
 
-# The EXPLORATION_STRATEGY parameter should only apply to the
-# functional-query workload because the larger datasets (ex. tpch) are
-# not generated in all table formats.
+# IMPALA-3947: "Exhaustive" tests are actually based on workload. This
+# means what we colloquially call "exhaustive" tests are actually
+# "exhaustive tests whose workloads are in this set below". Not all
+# workloads are able to be exhaustively run through buildall.sh. For
+# example, the tpch workload is never run exhaustively, because the
+# relatively large size of tpch means data loading in all exhaustive
+# formats takes much longer and data load snapshots containing tpch in
+# all exhaustive formats are much larger to store and take longer to
+# load.
+#
+# XXX If you change the --workload_exploration_strategy set below,
+# please update the buildall.sh help text for -testexhaustive.
 COMMON_PYTEST_ARGS="--maxfail=${MAX_PYTEST_FAILURES} --exploration_strategy=core"`
-    `" --workload_exploration_strategy=functional-query:$EXPLORATION_STRATEGY"
+    `" --workload_exploration_strategy="`
+        `"functional-query:${EXPLORATION_STRATEGY},"`
+        `"targeted-stress:${EXPLORATION_STRATEGY}"
+
 if [[ "${TARGET_FILESYSTEM}" == "local" ]]; then
   # Only one impalad is supported when running against local filesystem.
   COMMON_PYTEST_ARGS+=" --impalad=localhost:21000"

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9414d53a/buildall.sh
----------------------------------------------------------------------
diff --git a/buildall.sh b/buildall.sh
index 291f19e..9c4385c 100755
--- a/buildall.sh
+++ b/buildall.sh
@@ -119,6 +119,8 @@ do
       ;;
     -testexhaustive)
       EXPLORATION_STRATEGY=exhaustive
+      # See bin/run-all-tests.sh and IMPALA-3947 for more information on
+      # what this means.
       ;;
     -snapshot_file)
       SNAPSHOT_FILE="${2-}"
@@ -172,7 +174,7 @@ do
       echo "[-noclean] : Omits cleaning all packages before building. Will not kill"\
            "running Hadoop services unless any -format* is True"
       echo "[-format] : Format the minicluster, metastore db, and sentry policy db"\
-           " [Default: False]"
+           "[Default: False]"
       echo "[-format_cluster] : Format the minicluster [Default: False]"
       echo "[-format_metastore] : Format the metastore db [Default: False]"
       echo "[-format_sentry_policy_db] : Format the Sentry policy db [Default: False]"
@@ -183,16 +185,17 @@ do
       echo "[-skiptests] : Skips execution of all tests"
       echo "[-notests] : Skips building and execution of all tests"
       echo "[-start_minicluster] : Start test cluster including Impala and all"\
-            " its dependencies. If already running, all services are restarted."\
-            " Regenerates test cluster config files. [Default: True if running "\
-            " tests or loading data, False otherwise]"
+           "its dependencies. If already running, all services are restarted."\
+           "Regenerates test cluster config files. [Default: True if running"\
+           "tests or loading data, False otherwise]"
       echo "[-start_impala_cluster] : Start Impala minicluster after build"\
-           " [Default: False]"
+           "[Default: False]"
       echo "[-testpairwise] : Run tests in 'pairwise' mode (increases"\
            "test execution time)"
-      echo "[-testexhaustive] : Run tests in 'exhaustive' mode (significantly increases"\
-           "test execution time)"
-      echo "[-testdata] : Loads test data. Implied as true if -snapshot_file is "\
+      echo "[-testexhaustive] : Run tests in 'exhaustive' mode, which significantly"\
+           "increases test execution time. ONLY APPLIES to suites with workloads:"\
+           "functional-query, targeted-stress"
+      echo "[-testdata] : Loads test data. Implied as true if -snapshot_file is"\
            "specified. If -snapshot_file is not specified, data will be regenerated."
       echo "[-snapshot_file <file name>] : Load test data from a snapshot file"
       echo "[-metastore_snapshot_file <file_name>]: Load the hive metastore snapshot"

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9414d53a/tests/custom_cluster/test_spilling.py
----------------------------------------------------------------------
diff --git a/tests/custom_cluster/test_spilling.py b/tests/custom_cluster/test_spilling.py
index 7f90a8e..df69a67 100644
--- a/tests/custom_cluster/test_spilling.py
+++ b/tests/custom_cluster/test_spilling.py
@@ -19,28 +19,56 @@ import pytest
 from copy import deepcopy
 
 from tests.common.custom_cluster_test_suite import CustomClusterTestSuite
-from tests.common.test_dimensions import (ImpalaTestDimension,
+from tests.common.test_dimensions import (
+    ImpalaTestDimension,
     create_single_exec_option_dimension,
     create_parquet_dimension)
 
+
 class TestSpillStress(CustomClusterTestSuite):
+
+  # IMPALA-4914 part 3 TODO: In the agg_stress workload file used below, it's implied
+  # that the workload needs to run concurrently. This appears to attempt to try to do
+  # that by using TEST_IDS and NUM_ITERATIONS and assuming multiple instances of the
+  # test will run concurrently. But custom cluster tests are explicitly never run
+  # concurrently. If this test ought to run concurrently, then new test infra is needed
+  # to achieve that.
+
   @classmethod
   def get_workload(self):
     return 'targeted-stress'
 
   @classmethod
   def setup_class(cls):
-    super(TestSpillStress, cls).setup_class()
+    # Don't do anything expensive if we're not running exhaustively for this workload.
+    # This check comes first.
     if cls.exploration_strategy() != 'exhaustive':
       pytest.skip('runs only in exhaustive')
     # Since test_spill_stress below runs TEST_IDS * NUM_ITERATIONS times, but we only
     # need Impala to start once, it's inefficient to use
     # @CustomClusterTestSuite.with_args() to restart Impala every time. Instead, start
-    # Impala here, once.
+    # Impala here, once. We start impalad before we try to make connections to it.
     #
     # Start with 256KB buffers to reduce data size required to force spilling.
-    cls._start_impala_cluster(['--impalad_args=--"read_size=262144"',
-        'catalogd_args="--load_catalog_in_background=false"'])
+    cls._start_impala_cluster([
+        '--impalad_args=--"read_size=262144"',
+        '--catalogd_args="--load_catalog_in_background=false"'])
+    # This super() call leapfrogs CustomClusterTestSuite.setup_class() and calls
+    # ImpalaTestSuite.setup_class(). This is done because this is an atypical custom
+    # cluster test that doesn't restart impalad per test method, but it still needs
+    # self.client (etc.) set up. IMPALA-4914 part 3 would address this.
+    # This happens last because we need impalad started above before we try to establish
+    # connections to it.
+    super(CustomClusterTestSuite, cls).setup_class()
+
+  @classmethod
+  def teardown_class(cls):
+    # This teardown leapfrog matches the setup leapfrog above, for equivalent reasons.
+    # CustomClusterTestSuite.teardown_class() overrides ImpalaTestSuite.teardown_class()
+    # and is a no-op. ImpalaTestSuite.teardown_class() stops client connections. The
+    # *next* custom cluster test that py.test chooses to run is responsible for
+    # restarting impalad.
+    super(CustomClusterTestSuite, cls).teardown_class()
 
   def setup_method(self, method):
     # We don't need CustomClusterTestSuite.setup_method() or teardown_method() here

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9414d53a/tests/stress/test_ddl_stress.py
----------------------------------------------------------------------
diff --git a/tests/stress/test_ddl_stress.py b/tests/stress/test_ddl_stress.py
index 5a55f5d..9aa6de5 100644
--- a/tests/stress/test_ddl_stress.py
+++ b/tests/stress/test_ddl_stress.py
@@ -16,21 +16,22 @@
 # under the License.
 
 import pytest
-import uuid
 
 from tests.common.impala_test_suite import ImpalaTestSuite
 from tests.common.skip import SkipIfS3, SkipIfIsilon, SkipIfLocal
-from tests.common.test_vector import ImpalaTestDimension
 
 # Number of tables to create per thread
 NUM_TBLS_PER_THREAD = 10
 
 # Each client will get a different test id.
-TEST_IDS = xrange(0, 10)
+TEST_INDICES = xrange(10)
+
 
 # Simple stress test for DDL operations. Attempts to create, cache,
 # uncache, then drop many different tables in parallel.
 class TestDdlStress(ImpalaTestSuite):
+  SHARED_DATABASE = 'test_ddl_stress_db'
+
   @classmethod
   def get_workload(self):
     return 'targeted-stress'
@@ -42,38 +43,40 @@ class TestDdlStress(ImpalaTestSuite):
     if cls.exploration_strategy() != 'exhaustive':
       pytest.skip("Should only run in exhaustive due to long execution time.")
 
-    cls.ImpalaTestMatrix.add_dimension(ImpalaTestDimension('test_id', *TEST_IDS))
     cls.ImpalaTestMatrix.add_constraint(
-        lambda v: v.get_value('exec_option')['batch_size'] == 0)
-
-    cls.ImpalaTestMatrix.add_constraint(lambda v:
-        v.get_value('table_format').file_format == 'text' and\
-        v.get_value('table_format').compression_codec == 'none')
+        lambda v: (v.get_value('table_format').file_format == 'text' and
+                   v.get_value('table_format').compression_codec == 'none'))
 
   @SkipIfS3.caching
   @SkipIfIsilon.caching
   @SkipIfLocal.caching
   @pytest.mark.stress
-  def test_create_cache_many_tables(self, vector):
+  @pytest.mark.parametrize('test_index', TEST_INDICES)
+  def test_create_cache_many_tables(self, vector, testid_checksum, test_index):
     self.client.set_configuration(vector.get_value('exec_option'))
-    self.client.execute("create database if not exists ddl_stress_testdb")
-    self.client.execute("use ddl_stress_testdb")
+    # Don't use unique_database so that we issue several "create database" statements
+    # rather simultaneously on the same object.
+    self.client.execute("create database if not exists {0}".format(self.SHARED_DATABASE))
 
-    # Since this test runs concurrently, UUIDs generated by separate processes may
-    # (arguably) not be guaranteed to be unique. To be certain, we add the test ID to the
-    # tables we create.
-    test_id = vector.vector_values[0].value
-    tbl_uniquifier = "proc_%s_%s" % (test_id, str(uuid.uuid4()).replace('-', ''))
     for i in xrange(NUM_TBLS_PER_THREAD):
-      tbl_name = "tmp_%s_%s" % (tbl_uniquifier, i)
+      tbl_name = "{db}.test_{checksum}_{i}".format(
+          db=self.SHARED_DATABASE,
+          checksum=testid_checksum,
+          i=i)
+      # Because we're using a shared database, first clear potential tables from any
+      # previous test runs that failed to properly clean up
+      self.client.execute("drop table if exists {0}".format(tbl_name))
+      self.client.execute("drop table if exists {0}_part".format(tbl_name))
       # Create a partitioned and unpartitioned table
       self.client.execute("create table %s (i int)" % tbl_name)
-      self.client.execute("create table %s_part (i int) partitioned by (j int)" %\
-          tbl_name)
+      self.client.execute(
+          "create table %s_part (i int) partitioned by (j int)" % tbl_name)
       # Add some data to each
-      self.client.execute("insert overwrite table %s select int_col from "\
+      self.client.execute(
+          "insert overwrite table %s select int_col from "
           "functional.alltypestiny" % tbl_name)
-      self.client.execute("insert overwrite table %s_part partition(j) "\
+      self.client.execute(
+          "insert overwrite table %s_part partition(j) "
           "values (1, 1), (2, 2), (3, 3), (4, 4), (4, 4)" % tbl_name)
 
       # Cache the data the unpartitioned table