You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by mi...@apache.org on 2018/03/26 22:33:07 UTC

[2/2] impala git commit: IMPALA-6715, IMPALA-6736: fix stress TPC workload selection

IMPALA-6715,IMPALA-6736: fix stress TPC workload selection

IMPALA-6715:
This commit
  IMPALA-6551: Change Kudu TPCDS and TPCH columns to DECIMAL
added additional decimal_v2 queries to the stress test that amount to
running the same query twice. This makes the binary search run
incredibly slow.

- Fix the query selection. Add additional queries that weren't matching
  before, like the tpcds-q[0-9]+a.test series.

- Add a test that will at least ensure if
  testdata/workloads/tpc*/queries is modified, the stress test will
  still find the same number of queries for the given workload. There's
  no obvious place to put this test: it's not testing the product at
  all, so:

- Add a new directory tests/infra for such tests and add it to
  tests/run-tests.py.

- Move the test from IMPALA-6441 into tests/infra.

Testing:
- Core private build passed. I manually looked to make sure the moved
  and new tests ran.

- Short stress test run. I checked the runtime info and saw the new
  TPCDS queries in the JSON.

- While testing on hardware clusters down stream, I noticed...

IMPALA-6736:
  TPC-DS Q67A is 10x more expensive to run without spilling than any
  other query. I fixed the --filter-query-mem-ratio option to work. This
  will still run Q67A during the binary search phase, but if a cluster
  is too small, the query will be skipped.

Change-Id: I3e26b64d38aa8d63a176daf95c4ac5dee89508da
Reviewed-on: http://gerrit.cloudera.org:8080/9758
Reviewed-by: David Knupp <dk...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/cd939a24
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/cd939a24
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/cd939a24

Branch: refs/heads/master
Commit: cd939a2415077781d4fee258257f097eaa68cf4a
Parents: 52a2996
Author: Michael Brown <mi...@cloudera.com>
Authored: Wed Mar 21 13:08:50 2018 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Mon Mar 26 22:26:12 2018 +0000

----------------------------------------------------------------------
 tests/infra/test_stress_infra.py  | 60 ++++++++++++++++++++++++++++++++++
 tests/metadata/test_explain.py    | 22 -------------
 tests/run-tests.py                |  2 +-
 tests/stress/concurrent_select.py | 29 ++++++++++++++--
 4 files changed, 87 insertions(+), 26 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/cd939a24/tests/infra/test_stress_infra.py
----------------------------------------------------------------------
diff --git a/tests/infra/test_stress_infra.py b/tests/infra/test_stress_infra.py
new file mode 100644
index 0000000..58f7625
--- /dev/null
+++ b/tests/infra/test_stress_infra.py
@@ -0,0 +1,60 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# This module attempts to enforce infrastructural assumptions that bind test tools to
+# product or other constraints. We want to stop these assumptions from breaking at
+# pre-commit time, not later.
+
+import pytest
+
+from decimal import Decimal
+
+from tests.common.impala_test_suite import ImpalaTestSuite
+from tests.stress.concurrent_select import (
+    EXPECTED_TPCDS_QUERIES_COUNT,
+    EXPECTED_TPCH_NESTED_QUERIES_COUNT,
+    EXPECTED_TPCH_QUERIES_COUNT,
+    load_tpc_queries,
+    match_memory_estimate)
+
+
+class TestStressInfra(ImpalaTestSuite):
+
+  def test_stress_binary_search_start_point(self):
+    """
+    Test that the stress test can use EXPLAIN to find the start point for its binary
+    search.
+    """
+    result = self.client.execute("explain select 1")
+    mem_limit, units = match_memory_estimate(result.data)
+    assert isinstance(units, str) and units.upper() in ('T', 'G', 'M', 'K', ''), (
+        'unexpected units {u} from explain memory estimation\n{output}:'.format(
+            u=units, output='\n'.join(result.data)))
+    assert Decimal(mem_limit) >= 0, (
+        'unexpected value from explain\n:' + '\n'.join(result.data))
+
+  @pytest.mark.parametrize(
+      'count_map',
+      [('tpcds', EXPECTED_TPCDS_QUERIES_COUNT),
+       ('tpch_nested', EXPECTED_TPCH_NESTED_QUERIES_COUNT),
+       ('tpch', EXPECTED_TPCH_QUERIES_COUNT)])
+  def test_stress_finds_workloads(self, count_map):
+    """
+    Test that the stress test will properly load TPC workloads.
+    """
+    workload, count = count_map
+    assert count == len(load_tpc_queries(workload))

http://git-wip-us.apache.org/repos/asf/impala/blob/cd939a24/tests/metadata/test_explain.py
----------------------------------------------------------------------
diff --git a/tests/metadata/test_explain.py b/tests/metadata/test_explain.py
index 22fc177..3ad411a 100644
--- a/tests/metadata/test_explain.py
+++ b/tests/metadata/test_explain.py
@@ -19,11 +19,8 @@
 #
 import re
 
-from decimal import Decimal
-
 from tests.common.impala_test_suite import ImpalaTestSuite
 from tests.common.skip import SkipIfLocal, SkipIfNotHdfsMinicluster
-from tests.stress.concurrent_select import match_memory_estimate
 from tests.util.filesystem_utils import WAREHOUSE
 
 # Tests the different explain levels [0-3] on a few queries.
@@ -178,22 +175,3 @@ class TestExplainEmptyPartition(ImpalaTestSuite):
     assert "missing relevant table and/or column statistics" in explain_result
     # Also test IMPALA-1530 - adding the number of partitions missing stats
     assert "partitions: 1/2 " in explain_result
-
-
-class TestInfraIntegration(ImpalaTestSuite):
-  """
-  This is a test suite to ensure separate test tooling in Python is compatible with the
-  product.
-  """
-  def test_stress_binary_search_start_point(self):
-    """
-    Test that the stress test can use EXPLAIN to find the start point for its binary
-    search.
-    """
-    result = self.client.execute("explain select 1")
-    mem_limit, units = match_memory_estimate(result.data)
-    assert isinstance(units, str) and units.upper() in ('T', 'G', 'M', 'K', ''), (
-        'unexpected units {u} from explain memory estimation\n{output}:'.format(
-            u=units, output='\n'.join(result.data)))
-    assert Decimal(mem_limit) >= 0, (
-        'unexpected value from explain\n:' + '\n'.join(result.data))

http://git-wip-us.apache.org/repos/asf/impala/blob/cd939a24/tests/run-tests.py
----------------------------------------------------------------------
diff --git a/tests/run-tests.py b/tests/run-tests.py
index 9902fc9..3a8bd2e 100755
--- a/tests/run-tests.py
+++ b/tests/run-tests.py
@@ -34,7 +34,7 @@ from _pytest.config import FILE_OR_DIR
 # We whitelist valid test directories. If a new test directory is added, update this.
 VALID_TEST_DIRS = ['failure', 'query_test', 'stress', 'unittests', 'aux_query_tests',
                    'shell', 'hs2', 'catalog_service', 'metadata', 'data_errors',
-                   'statestore']
+                   'statestore', 'infra']
 
 TEST_DIR = os.path.join(os.environ['IMPALA_HOME'], 'tests')
 RESULT_DIR = os.path.join(os.environ['IMPALA_EE_TEST_LOGS_DIR'], 'results')

http://git-wip-us.apache.org/repos/asf/impala/blob/cd939a24/tests/stress/concurrent_select.py
----------------------------------------------------------------------
diff --git a/tests/stress/concurrent_select.py b/tests/stress/concurrent_select.py
index 5146d35..fa8541c 100755
--- a/tests/stress/concurrent_select.py
+++ b/tests/stress/concurrent_select.py
@@ -84,6 +84,14 @@ from tests.util.thrift_util import op_handle_to_query_id
 
 LOG = logging.getLogger(os.path.splitext(os.path.basename(__file__))[0])
 
+# IMPALA-6715: Every so often the stress test or the TPC workload directories get
+# changed, and the stress test loses the ability to run the full set of queries. Set
+# these constants and assert that when a workload is used, all the queries we expect to
+# use are there.
+EXPECTED_TPCDS_QUERIES_COUNT = 71
+EXPECTED_TPCH_NESTED_QUERIES_COUNT = 22
+EXPECTED_TPCH_QUERIES_COUNT = 22
+
 # Used to short circuit a binary search of the min mem limit. Values will be considered
 # equal if they are within this ratio or absolute amount of each other.
 MEM_LIMIT_EQ_THRESHOLD_PC = 0.975
@@ -1065,7 +1073,10 @@ def load_tpc_queries(workload):
   queries = list()
   query_dir = os.path.join(
       os.path.dirname(__file__), "..", "..", "testdata", "workloads", workload, "queries")
-  file_name_pattern = re.compile(r"-(q\d+).test$")
+  # IMPALA-6715 and others from the past: This pattern enforces the queries we actually
+  # find. Both workload directories contain other queries that are not part of the TPC
+  # spec.
+  file_name_pattern = re.compile(r"^{0}-(q.*).test$".format(workload))
   for query_file in os.listdir(query_dir):
     match = file_name_pattern.search(query_file)
     if not match:
@@ -1956,6 +1967,7 @@ def main():
   # the TPC queries are expected to always complete successfully.
   if args.tpcds_db:
     tpcds_queries = load_tpc_queries("tpcds")
+    assert len(tpcds_queries) == EXPECTED_TPCDS_QUERIES_COUNT
     for query in tpcds_queries:
       query.db_name = args.tpcds_db
     queries.extend(tpcds_queries)
@@ -1964,6 +1976,7 @@ def main():
         queries.extend(generate_compute_stats_queries(cursor))
   if args.tpch_db:
     tpch_queries = load_tpc_queries("tpch")
+    assert len(tpch_queries) == EXPECTED_TPCH_QUERIES_COUNT
     for query in tpch_queries:
       query.db_name = args.tpch_db
     queries.extend(tpch_queries)
@@ -1972,6 +1985,7 @@ def main():
         queries.extend(generate_compute_stats_queries(cursor))
   if args.tpch_nested_db:
     tpch_nested_queries = load_tpc_queries("tpch_nested")
+    assert len(tpch_nested_queries) == EXPECTED_TPCH_NESTED_QUERIES_COUNT
     for query in tpch_nested_queries:
       query.db_name = args.tpch_nested_db
     queries.extend(tpch_nested_queries)
@@ -1980,6 +1994,7 @@ def main():
         queries.extend(generate_compute_stats_queries(cursor))
   if args.tpch_kudu_db:
     tpch_kudu_queries = load_tpc_queries("tpch")
+    assert len(tpch_kudu_queries) == EXPECTED_TPCH_QUERIES_COUNT
     for query in tpch_kudu_queries:
       query.db_name = args.tpch_kudu_db
     queries.extend(tpch_kudu_queries)
@@ -1992,6 +2007,7 @@ def main():
         queries.extend(generate_DML_queries(cursor, args.dml_mod_values))
   if args.tpcds_kudu_db:
     tpcds_kudu_queries = load_tpc_queries("tpcds")
+    assert len(tpcds_kudu_queries) == EXPECTED_TPCDS_QUERIES_COUNT
     for query in tpcds_kudu_queries:
       query.db_name = args.tpcds_kudu_db
     queries.extend(tpcds_kudu_queries)
@@ -2049,10 +2065,17 @@ def main():
 
     # Remove any queries that would use "too many" resources. This way a larger number
     # of queries will run concurrently.
+    if query.required_mem_mb_without_spilling is not None and \
+        query.required_mem_mb_without_spilling / float(impala.min_impalad_mem_mb) \
+            > args.filter_query_mem_ratio:
+      LOG.debug(
+          "Filtering non-spilling query that exceeds "
+          "--filter-query-mem-ratio: " + query.sql)
+      query.required_mem_mb_without_spilling = None
     if query.required_mem_mb_with_spilling is None \
-        or query.required_mem_mb_with_spilling / impala.min_impalad_mem_mb \
+        or query.required_mem_mb_with_spilling / float(impala.min_impalad_mem_mb) \
             > args.filter_query_mem_ratio:
-      LOG.debug("Filtered query due to mem ratio option: " + query.sql)
+      LOG.debug("Filtering query that exceeds --filter-query-mem-ratio: " + query.sql)
       del queries[idx]
 
   # Remove queries that have a nested loop join in the plan.